# Replication materials:

# Re-examining the Effects of Western Sanctions on Democracy and Human Rights in the 21st Century
# Anton Peez
# peez@em.uni-frankfurt.de

### Important: Please note that you must use PanelMatch version 2.2.0.
###            remotes::install_version("PanelMatch", version = "2.2.0")
### Important: Please note that you need a stable internet connection to use R::WDI.
###            This package occasionally does not work for a few minutes due to the World Bank's API,
###            which will be evident from the error message.

# @knitr Tab-Lit

rm(list = ls())

# Log if needed
# sink("04-Replication-Log.txt", split = TRUE)

library(readxl)
library(kableExtra)

Studies <- read_excel("Studies.xlsx")

Studies_tab <- data.frame(
  Variable = Studies$Study,
  Concept = Studies$`IV Concept`,
  Data = Studies$`IV Data`,
  Concept = Studies$`DV Concept`,
  Data = Studies$`DV Data`,
  Start = Studies$Start,
  End = Studies$End,
  Range = "",
  Effect = Studies$Effect
) %>%
  kbl(booktabs  = T,
      align     = ('lcccccccc'),
      caption = "Selected past research findings on the effects of sanctions on democracy and human rights.",
      col.names = c("Study",
                    "Concept",
                    "Data",
                    "Concept",
                    "Data",
                    "Start",
                    "End",
                    "Range",
                    "Effect")) %>%
  kable_classic(full_width = FALSE) %>%
  add_header_above(c(" " = 1, "Treatment" = 2, "Outcome" = 2, "Timeframe" = 3, " " = 1)) %>%
  column_spec(8, image = spec_pointrange(
    x = Studies$Mid, 
    xmin = Studies$Start, 
    xmax = Studies$End,
    col = "black",
    opacity = 0.5,
    vline = 2005)
  ) %>%
  add_footnote(c("`Range' column: Dotted vertical line indicates 2005 as a rough demarcation of the increased use of targeted sanctions.",
                 "`Effect' column: `+' indicates an improvement; `=' indicates no change; `--' indicates a deterioration."),
               notation = "none") %>%
  kableExtra::group_rows("Outcome: Human rights/physical integrity rights", 1, 15) %>%
  kableExtra::group_rows("Outcome: Democracy/political rights", 16, 20) %>%
  kableExtra::group_rows("Present study -- Outcome: Democracy and human rights", 21, 21) %>%
  kable_styling(font_size = 8) %>%
  kable_styling(latex_options="scale_down")  %>%
  kableExtra::landscape()

Studies_tab

cat(Studies_tab, file = "99-Tab-01.tex")
cat(Studies_tab, file = "99-Tab-01.html")



# @knitr data-prep
# Preparing the various data sources used in this paper

rm(list = ls())

# Libraries
library(readxl)
library(readr)
library(lubridate)
library(countrycode)
library(haven)
library(ggthemes)
library(sandwich)
library(lmtest)

### IST (sanctions)

# Open
IST <- read_excel("IST_2.0_20230624_lim.xlsx")

# Clean dates
IST$start <- dmy(IST$startdate)
IST$end   <- dmy(IST$terdate)

IST$start_year <- year(IST$start)
IST$end_year   <- year(IST$end)

# Fill "2050" for ongoing episodes
IST$end_year <- ifelse(is.na(IST$end_year), 2050, IST$end_year)

# "Any sanction" variable
IST$any_sanc_YN <- 1

# Sanctions goals variables (especially Democracy and HR): Yes/No
# Refer to the codebook at doi:10.7910/DVN/SVR5W7
IST$DM_YN      <- grepl("DM", IST$goals, fixed = TRUE)
IST$HR_YN      <- grepl("HR", IST$goals, fixed = TRUE)
IST$AC_YN      <- grepl("AC", IST$goals, fixed = TRUE)
IST$CB_YN      <- grepl("CB", IST$goals, fixed = TRUE)
IST$CT_YN      <- grepl("CT", IST$goals, fixed = TRUE)
IST$DT_YN      <- grepl("DT", IST$goals, fixed = TRUE)
IST$WM_YN      <- grepl("WM", IST$goals, fixed = TRUE)
IST$NP_YN      <- grepl("NP", IST$goals, fixed = TRUE)
IST$Other_YN   <- grepl("Other", IST$goals, fixed = TRUE)

# Measures taken, e.g., TE = copmprehensive trade embargo, etc.
# Refer to the codebook at doi:10.7910/DVN/SVR5W7
IST$m_TE_YN <- grepl("TE", IST$measures, fixed = TRUE)
IST$m_CE_YN <- grepl("CE", IST$measures, fixed = TRUE)
IST$m_OE_YN <- grepl("OE", IST$measures, fixed = TRUE)
IST$m_AE_YN <- grepl("AE", IST$measures, fixed = TRUE)
IST$m_IM_YN <- grepl("IM", IST$measures, fixed = TRUE)
IST$m_FS_YN <- grepl("FS", IST$measures, fixed = TRUE)
IST$m_FB_YN <- grepl("FB", IST$measures, fixed = TRUE)
IST$m_AS_YN <- grepl("AS", IST$measures, fixed = TRUE)
IST$m_AF_YN <- grepl("AF", IST$measures, fixed = TRUE)
IST$m_TB_YN <- grepl("TB", IST$measures, fixed = TRUE)
IST$m_DS_YN <- grepl("DS", IST$measures, fixed = TRUE)

# Remove Human Trafficking from HR_YN
# Reasoning: This US sanctions program is different from the types of human rights-related sanctions covered in research on 'democratic sanctions' (e.g., von Soest and Wahman 2015). This research typically focuses on directly state sponsored human rights violations. The Human Trafficking Act, which generally covers forced labor and sex trafficking, did not cover state-sponsored human trafficking until 2019. See https://www.state.gov/what-is-trafficking-in-persons/ (last accessed 03 July 2025).
IST$HT_YN       <- grepl("Human Trafficking Act", IST$comment, fixed = TRUE)
IST$HR_YN       <- ifelse((IST$HT_YN == TRUE), FALSE, IST$HR_YN)

# Final "Democrtaic Sanction" variable (= HR and/or DM)
IST$Dem_Sanc_YN <- ifelse((IST$DM_YN == TRUE | IST$HR_YN == TRUE), 1, 0)

# Final "Any non-'Democratic Sanction'" variable 
IST$any_nonDem_sanc <- ifelse((IST$AC_YN == TRUE | 
                               IST$CB_YN == TRUE |   
                               IST$CT_YN == TRUE |
                               IST$DT_YN == TRUE |
                               IST$WM_YN == TRUE |
                               IST$NP_YN == TRUE |
                               IST$HT_YN == TRUE |
                               IST$Other_YN), 1, 0)

# Senders: Keep only EU/EEC and US and UN
# Von Soest and Wahman only include US and EU, but UN sanctions by definition require US, FRA, GBR support.
# See the manuscript for a discussion and the appendix for the results when excluding UN sanctions.
IST <- IST %>% dplyr::filter(sender == 2 | sender == 1653 | sender == 1830 | sender == 4400)

# Assign target country names and V-Dem codes
# IST uses COW
IST$target_vdem <- countrycode(IST$target, "cown", "vdem")
# "Some values were not matched unambiguously: 54, 57, 58, 80, 345, 990."
# Assign them manually. These are:

# COW   Country name        V-Dem
#  54   Dominica            None
#  57   St. Vincent + G.    None
#  58   Antigua + Barbuda   None
#  80   Belize              None
# 345   Yugoslavia/Serbia   198
# 990   Samoa               None

# Assign Serbia
IST$target_vdem <- ifelse(IST$target == 345, IST$target_vdem == 198, IST$target_vdem)

# Create country-year data from episode data
IST_CY <- IST %>%
  dplyr::group_by(caseid) %>%                                   # Index = episode ID
  dplyr::mutate(year = list(c(start_year:end_year))) %>%
  tidyr::unnest(year)

# Cut everything after 2021 (had added "2050" as end for episodes ongoing as of 2021.)
IST_CY <- IST_CY %>% subset(year < 2022)

# Drop US ICC sanctions
# Reasoning: The US ICC sanctions account for 45 cases are represent a very particular policy in the 2000s (see Kelley 2008 APSR).
# In any case, they are not democracy- or human rights-related, so not immediately relevant to this study.
IST_CY <- IST_CY %>%  dplyr::filter(!grepl("ICC", comment))



### V-Dem

# Open and cut down
vdem <- read_csv("V-Dem-CY-Full+Others-v13.csv")
vdem_lim  <- vdem %>% dplyr::filter(year > 1988)
vdem_lim2 <- vdem_lim[, c('country_name',
                          'country_text_id',
                          'country_id',
                          'year',
                          'COWcode',
                          'v2x_polyarchy',
                          'v2x_libdem',
                          'v2x_partipdem',
                          'v2x_delibdem',
                          'v2x_egaldem',
                          'v2x_clphy',
                          'v2elfrfair',         # Free/fair elections
                          'v2elfrfair_osp',     # Free/fair elections -- 0--4 scale
                          'v2xel_frefair',      # Free/fair elections -- general index
                          'v2elwestmon',        # Fraud allegations by Western monitors
                          'v2elmonden',         # Int monitors denied; 1 = Y, 0 = N
                          'v2elmonref',         # Int monitors self-refused; 1 = Y, 0 = N
                          'v2elvotbuy',         # Vote buying
                          'v2elvotbuy_osp',     # Vote buying -- 0--4 scale
                          'v2elasmoff',         # Did election winner assume office?
                          'v2cagenmob',         # Mass Mobilization
                          'v2caconmob',
                          'v2cademmob',         # Mass mobilization for democracy
                          'v2caautmob',
                          'v2csreprss',
                          'v2x_neopat',
                          'v2x_regime_amb',
                          'e_civil_war',
                          'e_pop',              # Population
                          'e_gdppc',            # GDP per capita
                          'e_miinflat',         # Inflation
                          'e_fh_cl',            # Freedom House
                          'e_fh_pr',
                          'e_fh_rol',
                          'e_fh_status',
                          'e_autoc')]           # Polity IV

# Generate "controversial election" variable, as von Soest/Wahman 2015 do
vdem_lim2$cont_elec <- ifelse((vdem_lim2$v2elwestmon == 1 |
                               vdem_lim2$v2elmonden  == 1 |
                               vdem_lim2$v2elmonref  == 1 
                               ), 1, 0)

vdem_lim2$cont_elec   <- ifelse(is.na(vdem_lim2$cont_elec), 0, vdem_lim2$cont_elec)



### Coups
#   Chin et. al 2021/2022 ISQ
#   https://academic.oup.com/isq/article/65/4/1040/6312785?login=true#supplementary-data
#   https://www.johnjchin.com/colpus

# Convert the list into CY format
Colpus_Data <- read_dta("Master_Coup List_all countries_basic.dta")
Colpus_Data <- Colpus_Data %>% dplyr::filter(year > 1988)

# Define the range of years
start_year <- min(Colpus_Data$year)
end_year <- max(Colpus_Data$year)

# Create a list of unique countries
countries <- unique(Colpus_Data$ccode)

# Generate all permutations of countries and years
country_years <- expand.grid(
  ccode = countries,
  year = seq(start_year, end_year)
)

# Indicator in Colpus
Colpus_Data <- Colpus_Data %>%
  dplyr::filter(success == 1) %>%
  dplyr::mutate(s_coup_occurred = 1)       # Successful coup occurred

# Merge the datasets
Colpus_Data_lim <- country_years %>%
  dplyr::left_join(Colpus_Data, by = c("ccode", "year")) %>%
  dplyr::mutate(s_coup_occurred = ifelse(is.na(s_coup_occurred), 0, s_coup_occurred))

Colpus_Data_lim$vdem <- countrycode(Colpus_Data_lim$ccode, "cown", "vdem")

Colpus_Data_lim$vdem_year <- paste(Colpus_Data_lim$vdem, Colpus_Data_lim$year, sep = "_")

# Export
write_csv(Colpus_Data_lim, "coups_1960_2021.csv")



### UNGA voting proximity

load("AgreementScoresAll_Sep2023.Rdata")

Voeten_Data <- dfAgree

# Only US dyads, and dyads w/ five largest EU states -- FRA, GER, ESP, ITA, GBR (see von Soest/Wahman 2015))
Voeten_Data <- subset(Voeten_Data, (ccode1 == 2   |
                                    ccode1 == 255 |
                                    ccode1 == 220 |
                                    ccode1 == 325 |
                                    ccode1 == 230 |
                                    ccode1 == 200 ))

# Only post-1988
Voeten_Data <- subset(Voeten_Data, year > 1988)

# Country codes to numeric
Voeten_Data$ccode2 <- as.numeric(Voeten_Data$ccode2)

# Sum/Average EU agreement
Voeten_Data_comp <- Voeten_Data %>%
  dplyr::group_by(ccode2, year) %>%
  dplyr::summarise(dist_US  = sum(IdealPointDistance[ccode1=="2"], na.rm = TRUE),    # Proximity
                   dist_EU  = mean(IdealPointDistance[ccode1 != "2"], na.rm = TRUE),
                   agree_US = sum(agree[ccode1=="2"], na.rm = TRUE),                 # Shares
                   agree_EU = mean(agree[ccode1 != "2"], na.rm = TRUE))

# Country names
Voeten_Data_comp$vdem  <- countrycode(Voeten_Data_comp$ccode2, "cown", "vdem")

#   Some values were not matched unambiguously: 260
#   Some values were not matched unambiguously: 31, 54, 55, 56, 57, 58, 60, 80, 221, 223, 232, 260, 315, 331, 338, 345, 678, 816, 835, 946, 947, 955, 970, 983, 986, 987, 990

# Mainly small states not covered by V-Dem, as above
# Assign those avaibale in V-Dem manually
Voeten_Data_comp$vdem <- ifelse(Voeten_Data_comp$ccode2 == 260, 77,  Voeten_Data_comp$vdem)
Voeten_Data_comp$vdem <- ifelse(Voeten_Data_comp$ccode2 == 315, 157, Voeten_Data_comp$vdem)
Voeten_Data_comp$vdem <- ifelse(Voeten_Data_comp$ccode2 == 345, 198, Voeten_Data_comp$vdem)
Voeten_Data_comp$vdem <- ifelse(Voeten_Data_comp$ccode2 == 678, 14,  Voeten_Data_comp$vdem)
Voeten_Data_comp$vdem <- ifelse(Voeten_Data_comp$ccode2 == 816, 34,  Voeten_Data_comp$vdem)

# Country names
Voeten_Data_comp$cname <- countrycode(Voeten_Data_comp$ccode2, "cown", "country.name")
Voeten_Data_comp$cname <- ifelse(Voeten_Data_comp$ccode2 == 260, "Germany", Voeten_Data_comp$cname)

# Create a Country-Year identifier
Voeten_Data_comp$vdem_year <- paste(Voeten_Data_comp$vdem, Voeten_Data_comp$year, sep = "_")

# US and EU average (proximity and agreement share)
# This serves as a measure of political proximity to the main Western sanctions senders
Voeten_Data_comp$dist_EU_US <- (Voeten_Data_comp$dist_EU + Voeten_Data_comp$dist_US)/2
Voeten_Data_comp$agree_EU_US <- (Voeten_Data_comp$agree_EU + Voeten_Data_comp$agree_US)/2

# Rescale this to 0--10
Voeten_Data_comp$dist_EU_US <- scales::rescale(Voeten_Data_comp$dist_EU_US, to = c(0, 10))

# Export
write_csv(Voeten_Data_comp, "UNGA_voting.csv")



### WDI

# This package accesses World Bank data via API.
# Sometimes it doesn't work due to problems on the World Bank's end. But it generally runs.
# This package is used to avoid tedious cleaning/reshaping.
# Use the World bank indicators to look up the exact details on each variables (e.g., 'BX.KLT.DINV.WD.GD.ZS').

library(WDI)

WDI_fdishare       <- WDI(indicator='BX.KLT.DINV.WD.GD.ZS', country = "all", start=1985, end=2021) # Foreign direct investment, net inflows (% of GDP)
WDI_inflation      <- WDI(indicator='FP.CPI.TOTL.ZG',       country = "all", start=1985, end=2021) # Inflation, consumer prices (annual %)
WDI_inflation_defl <- WDI(indicator='NY.GDP.DEFL.KD.ZG',    country = "all", start=1985, end=2021) # Inflation, GDP deflator (annual %)

WDI_GDP_pc         <- WDI(indicator='NY.GDP.PCAP.KD',       country = "all", start=1985, end=2021) # GDP per capita (constant 2015 US$)
WDI_GDP_growth     <- WDI(indicator='NY.GDP.MKTP.KD.ZG',    country = "all", start=1985, end=2021) # GDP growth (annual %)

WDI_GDP15          <- WDI(indicator='NY.GDP.MKTP.KD',       country = "all", start=1985, end=2021) # GDP (constant 2015 US$)
WDI_OIL            <- WDI(indicator='NY.GDP.PETR.RT.ZS',    country = "all", start=1985, end=2021) # Oil rents (% of GDP)

# Create unique Country-Year IDs
WDI_fdishare$country_year       <-  paste(WDI_fdishare$iso3c, WDI_fdishare$year, sep = "_")
WDI_inflation$country_year      <-  paste(WDI_inflation$iso3c, WDI_inflation$year, sep = "_")
WDI_inflation_defl$country_year <-  paste(WDI_inflation_defl$iso3c, WDI_inflation_defl$year, sep = "_")

WDI_GDP_pc$country_year         <-  paste(WDI_GDP_pc$iso3c, WDI_GDP_pc$year, sep = "_")
WDI_GDP_growth$country_year     <-  paste(WDI_GDP_growth$iso3c, WDI_GDP_growth$year, sep = "_")

WDI_GDP15$country_year          <-  paste(WDI_GDP15$iso3c, WDI_GDP15$year, sep = "_")
WDI_OIL$country_year            <-  paste(WDI_OIL$iso3c, WDI_OIL$year, sep = "_")

WDI_fdishare       <- WDI_fdishare       %>% dplyr::filter(iso3c != "")
WDI_inflation      <- WDI_inflation      %>% dplyr::filter(iso3c != "")
WDI_inflation_defl <- WDI_inflation_defl %>% dplyr::filter(iso3c != "")
WDI_GDP_pc         <- WDI_GDP_pc         %>% dplyr::filter(iso3c != "")
WDI_GDP_growth     <- WDI_GDP_growth     %>% dplyr::filter(iso3c != "")
WDI_GDP15          <- WDI_GDP15          %>% dplyr::filter(iso3c != "")
WDI_OIL            <- WDI_OIL            %>% dplyr::filter(iso3c != "")
  
# Merge these World Bank datasets
WDI_data <- merge(WDI_fdishare,
                  WDI_inflation,
                  by = "country_year")

WDI_data <- merge(WDI_data,
                  WDI_inflation_defl,
                  by = "country_year")

WDI_data <- merge(WDI_data,
                  WDI_GDP_pc,
                  by = "country_year")

WDI_data <- merge(WDI_data,
                  WDI_GDP_growth,
                  by = "country_year")

WDI_data <- merge(WDI_data,
                  WDI_GDP15,
                  by = "country_year")

WDI_data <- merge(WDI_data,
                  WDI_OIL,
                  by = "country_year")

# Drop duplicate indicators
WDI_data <- subset(WDI_data, select = c(country_year,
                                        country.x,
                                        iso3c.x,
                                        year.x,
                                        BX.KLT.DINV.WD.GD.ZS,
                                        FP.CPI.TOTL.ZG,
                                        NY.GDP.DEFL.KD.ZG,
                                        NY.GDP.PCAP.KD,
                                        NY.GDP.MKTP.KD.ZG,
                                        NY.GDP.MKTP.KD,   
                                        NY.GDP.PETR.RT.ZS))

WDI_data <- WDI_data %>% dplyr::rename(FDI_pct_GDP  = BX.KLT.DINV.WD.GD.ZS,
                                INF_pct      = FP.CPI.TOTL.ZG      ,
                                INF_b_pct    = NY.GDP.DEFL.KD.ZG   ,
                                GDPpc        = NY.GDP.PCAP.KD      ,
                                GDPg         = NY.GDP.MKTP.KD.ZG   ,
                                GDP15        = NY.GDP.MKTP.KD      ,
                                OIL          = NY.GDP.PETR.RT.ZS   )

# Adjust the units for some variables
WDI_data$GDPpc <- WDI_data$GDPpc/1000
WDI_data$OIL   <- WDI_data$OIL/1000000

# Drop all World Bank geographical regions.
# Drop all non-independent states.
exclude_regions <- c("AFE", "AFW", "ARB", "CEB", "CSS", "EAP", "EAR", "EAS", 
                     "ECA", "ECS", "EMU", "EUU", "FCS", "HIC", "HPC", "IBD", 
                     "IBT", "IDA", "IDB", "IDX", "INX", "LAC", "LCN", "LDC", 
                     "LIC", "LMC", "LMY", "LTE", "MEA", "MIC", "MNA", "NAC", 
                     "OED", "OSS", "PRE", "PSS", "PST", "SAS", "SSA", "SSF", 
                     "SST", "TEA", "TEC", "TLA", "TMN", "TSA", "TSS", "UMC", "WLD")

exclude_nonindep <- c("ABW", "ASM", "BMU", "CHI", "CUW", "CYM", "FRO", "GIB", 
                      "GRL", "GUM", "HKG", "IMN", "MAC", "MAF", "MNP", "NCL", 
                      "PRI", "PSE", "PYF", "SXM", "TCA", "VGB", "VIR", "XKX")

WDI_data <- WDI_data %>%
  dplyr::filter(!iso3c.x %in% exclude_regions) %>%
  dplyr::filter(!iso3c.x %in% exclude_nonindep)
# 7141 observations

# Add COW country codes
WDI_data$COWcode   <-    countrycode(WDI_data$iso3c.x, origin = "iso3c", destination = "cown")
# One value not matched unambiguously: Serbia (SRB) needs a COW country code.
# COW lists Serbia as 345 (formerly Yugoslavia), so we will do the same.
WDI_data$COWcode   <-    ifelse(WDI_data$iso3c.x=="SRB", 345, WDI_data$COWcode)

# Add VDem country codes
WDI_data$VDemcode   <-   countrycode(WDI_data$iso3c.x, origin = "iso3c", destination = "vdem")
# Some values were not matched unambiguously:
# AND, ATG, BHS, BLZ, BRN, DMA, FSM, GRD, KIR, KNA, LCA, LIE, MCO, MHL, MLT, NRU, PLW, SMR, TON, TUV, VCT, VNM, WSM
# Check individually in the following.
# Reference: https://www.v-dem.net/media/filer_public/3a/b4/3ab4f110-25c3-40b7-88c8-c600a21d91ae/v-dem_country_coding_units_v9.pdf

# Have VDem codes, but those don't show up:
# AND       Andorra         142
# BLZ       Belize          149
# BRN       Brunei          151
# LIE       Liechtenstein   172
# MCO       Monaco          182
# MLT       Malta           178
# SMR       San Marino      195
# VNM       Vietnam          35  -- South Vietnam (i.e. until 1975; VDem 35)

# So, assign these manually:
WDI_data$VDemcode   <-  ifelse(WDI_data$iso3c.x=="AND", 142, WDI_data$VDemcode)
WDI_data$VDemcode   <-  ifelse(WDI_data$iso3c.x=="BLZ", 149, WDI_data$VDemcode)
WDI_data$VDemcode   <-  ifelse(WDI_data$iso3c.x=="BRN", 151, WDI_data$VDemcode)
WDI_data$VDemcode   <-  ifelse(WDI_data$iso3c.x=="LIE", 172, WDI_data$VDemcode)
WDI_data$VDemcode   <-  ifelse(WDI_data$iso3c.x=="MCO", 182, WDI_data$VDemcode)
WDI_data$VDemcode   <-  ifelse(WDI_data$iso3c.x=="MLT", 178, WDI_data$VDemcode)
WDI_data$VDemcode   <-  ifelse(WDI_data$iso3c.x=="SMR", 195, WDI_data$VDemcode)
WDI_data$VDemcode   <-  ifelse(WDI_data$iso3c.x=="VNM",  35, WDI_data$VDemcode)

# Don't have VDem codes:
# Antigua and Barbuda, Bahamas, Dominica, Micronesia, Grenada, Kiribati,
# St. Kitts and Nevis, St. Lucia, Marsshall Islands, Nauru, Palau, Tonga,
# Tuvalu,  St. Vincent and the Grenadines, Samoa

# Final check: Are there any observations missing COW or VDem country codes?
library(dplyr)
missingCOW   <-  dplyr::filter(WDI_data, is.na(WDI_data$COWcode))
# 0 observations
missingVDem  <-  dplyr::filter(WDI_data, is.na(WDI_data$VDemcode))
# 555 observations
# Above, 15 states didn't have VDem codes.
# World Bank data covers 37 years.
# 15 x 37 = 555

# Country-Year IDs from COW and VDem country codes
WDI_data$cow_year    <-    paste(WDI_data$COWcode,  WDI_data$year.x, sep = "_"  )
WDI_data$vdem_year   <-    paste(WDI_data$VDemcode, WDI_data$year.x, sep = "_" )

# And rename the ISO3-ID variable as well, plus a few others.
names(WDI_data)[names(WDI_data) == 'country_year'] <- 'iso3c_year'

# Reorder
WDI_data <- WDI_data[c("country.x",
                       "year.x",
                       "COWcode",
                       "VDemcode",
                       "iso3c_year",
                       "cow_year",
                       "vdem_year",
                       "FDI_pct_GDP",
                       "INF_pct",
                       "INF_b_pct",
                       "GDPpc",
                       "GDPg",
                       "GDP15",
                       "OIL")]

# Calculate Oil revenues
WDI_data$OIL_rev15 <- WDI_data$GDP15 * WDI_data$OIL
# This caculates GDP (constant 2015 US$) * Oil rents (% of GDP); i.e., absolute oil revenues

# Year to numeric
WDI_data$year.x <- as.numeric(WDI_data$year.x)



### COW trade data

trade <- read.csv("Dyadic_COW_4.0.csv")
trade <- trade %>% filter(year > 1987)

# Target country codes -- USA and EU-28
target_codes <- target_codes <- c(
  305, 211, 355, 344, 352, 316, 390, 366, 375,
  220, 255, 350, 310, 205, 325, 367, 368, 212,
  338, 210, 290, 235, 360, 317, 349, 230, 380, 200, 2
)

# Step 0: Replace -9 with NA in flow1 and flow2
trade_cleaned <- trade %>%
  mutate(
    flow1 = ifelse(flow1 == -9, NA, flow1),
    flow2 = ifelse(flow2 == -9, NA, flow2)
  )

# Step 1: Compute total trade and retain country codes + names
trade_prepped <- trade_cleaned %>%
  mutate(total_flow = flow1 + flow2) %>%
  select(year, ccode1, ccode2, importer1, importer2, total_flow)

# Step 2: Duplicate each dyad so both countries appear as focal country
trade_long <- trade_prepped %>%
  rename(focal_code = ccode1, partner_code = ccode2,
         focal_name = importer1, partner_name = importer2) %>%
  bind_rows(
    trade_prepped %>%
      rename(focal_code = ccode2, partner_code = ccode1,
             focal_name = importer2, partner_name = importer1)
  )

# Step 3: Filter to only include trade with the target countries
target_codes <- c(2, 255, 200, 220, 230, 210)
trade_filtered <- trade_long %>%
  filter(partner_code %in% target_codes)

# Step 4: Summarize total trade per focal country-year (with target partners only)
country_year_trade <- trade_filtered %>%
  group_by(focal_code, focal_name, year) %>%
  summarise(total_trade_US_EU = sum(total_flow, na.rm = TRUE), .groups = "drop")

library(countrycode)

# Convert COW codes (focal_code) to V-Dem numeric codes
country_year_trade <- country_year_trade %>%
  mutate(
    vdem_code = countrycode(focal_code, origin = "cown", destination = "vdem"),
    country_year = paste0(vdem_code, "_", year)
  )

# ! Some values were not matched unambiguously: 31, 54, 55, 56, 57, 58, 60, 80, 221, 223, 232, 260, 315, 331, 345, 678, 835, 946, 947, 955, 970, 983, 986, 987, 990 
# Mostly smaller countries covered in WDI but not in V-Dem



### Human Rights Scores 

Farriss_HR <- read_csv("LHRS-v4.02-2021.csv")

Farriss_HR <- Farriss_HR %>%
  subset(YEAR > 1988)

Farriss_HR$vdem         <- countrycode(Farriss_HR$COW, "cown", "vdem")
Farriss_HR$country_name <- countrycode(Farriss_HR$COW, "cown", "country.name")

# Some values were not matched unambiguously:
#  31,  54,  55,  56,  57,  58,  60,  80, 221, 223, 232, 260, 331, 338,
# 345, 678, 816, 817, 835, 946, 947, 955, 970, 983, 986, 987, 990

# Mainly small states not covered by V-Dem, as above. Match the rest manually.
Farriss_HR$vdem <- ifelse(Farriss_HR$COW == 260, 77,  Farriss_HR$vdem)
Farriss_HR$vdem <- ifelse(Farriss_HR$COW == 315, 157, Farriss_HR$vdem)
Farriss_HR$vdem <- ifelse(Farriss_HR$COW == 345, 198, Farriss_HR$vdem)
Farriss_HR$vdem <- ifelse(Farriss_HR$COW == 678, 14,  Farriss_HR$vdem)
Farriss_HR$vdem <- ifelse(Farriss_HR$COW == 816, 34,  Farriss_HR$vdem)
Farriss_HR$vdem <- ifelse(Farriss_HR$COW == 817, 35,  Farriss_HR$vdem)

# CY identifier
Farriss_HR$vdem_year <- paste(Farriss_HR$vdem, Farriss_HR$YEAR, sep = "_")

# Drop doubles, Yemen and Germany 1990
Farriss_HR <- Farriss_HR[!(Farriss_HR$vdem_year == "14_1990" & Farriss_HR$country_name == "Yemen Arab Republic"),]
Farriss_HR <- Farriss_HR[!(Farriss_HR$vdem_year == "77_1990" & Farriss_HR$country_name == "German Federal Republic"),]



### Wars and conflict (UCDP)

### UCDP Ongoing Civil War Data
#   https://ucdp.uu.se/downloads/
#   Codebook: https://ucdp.uu.se/downloads/ucdpprio/ucdp-prio-acd-191.pdf
#   We are mostly interested in the variable "type_of_conflict", and the =3 and =4 of that.
#   1 = extrasystemic        state vs. non-state group outside its own territory...)
#   2 = interstate           state vs. state
#   3 = internal             gov't vs rebels; without foreign involvement
#   4 = international int'l  gov't vs rebels; with foreign involvement

ucdp <- read_csv("ucdp-prio-acd-211.csv")

# Types of conflict: 1, 2, 3, 4, 3 or 4 -- Yes/No
ucdp$conflict_type_1      <-    ifelse(ucdp$type_of_conflict == 1, 1, 0)
ucdp$conflict_type_2      <-    ifelse(ucdp$type_of_conflict == 2, 1, 0)

ucdp$conflict_type_3      <-    ifelse(ucdp$type_of_conflict == 3, 1, 0)
ucdp$conflict_type_4      <-    ifelse(ucdp$type_of_conflict == 4, 1, 0)

ucdp$conflict_type_3or4   <-    ifelse(ucdp$type_of_conflict >  2, 1, 0)

### Country-Year IDs

# Restrict to Civil Wars only (3 or 4), to drop some of the messy interstate war coalitions
# Restrict to 1950 onwards, like VDem above
ucdp_civil_war_only       <-    ucdp  %>%
  filter(conflict_type_3or4  ==  1 | conflict_type_2 == 1)  %>%
  filter(year > 1949)

ucdp_civil_war_only$gwno_a <- as.numeric(ucdp_civil_war_only$gwno_a)

ucdp_civil_war_only$COWcode    <-   countrycode(ucdp_civil_war_only$gwno_a,
                                                origin = "gwn",
                                                destination = "cown")

# Some values were not matched unambiguously: 55, "900, 200, 2"
# 55 is Grenada, not covered in V-Dem
# Will code the Iraq war as USA
ucdp_civil_war_only$COWcode   <-  ifelse(ucdp_civil_war_only$gwno_a=="900, 200, 2",
                                         2,
                                         ucdp_civil_war_only$COWcode)

ucdp_civil_war_only$VDemcode   <-   countrycode(ucdp_civil_war_only$gwno_a, origin = "gwn", destination = "vdem")
#   Some values were not matched unambiguously: 345, 678, "900, 200, 2" -- Final one is the Iraq War -- will set to US only (though not relevant to this study)
#   GWN     Country               VDem
#   345     Serbia                198
#   678     (North) Yemen          14
#   817     Republic of Vietnam    35
#   Assign manually:
ucdp_civil_war_only$VDemcode   <-  ifelse(ucdp_civil_war_only$gwno_a==345, 198, ucdp_civil_war_only$VDemcode)
ucdp_civil_war_only$VDemcode   <-  ifelse(ucdp_civil_war_only$gwno_a==678, 14, ucdp_civil_war_only$VDemcode)
ucdp_civil_war_only$VDemcode   <-  ifelse(ucdp_civil_war_only$gwno_a==817, 35, ucdp_civil_war_only$VDemcode)
ucdp_civil_war_only$VDemcode   <-  ifelse(ucdp_civil_war_only$gwno_a=="900, 200, 2", 20, ucdp_civil_war_only$VDemcode)

# COW to iso3c
ucdp_civil_war_only$iso3c      <-   countrycode(ucdp_civil_war_only$COWcode, origin = "cown", destination = "iso3c")
#   Some values were not matched unambiguously: 345, 678, 680
#   Assign manually:
ucdp_civil_war_only$iso3c   <-  ifelse(ucdp_civil_war_only$COWcode==345, "SRB", ucdp_civil_war_only$iso3c)
ucdp_civil_war_only$iso3c   <-  ifelse(ucdp_civil_war_only$COWcode==678, "YEM", ucdp_civil_war_only$iso3c)
ucdp_civil_war_only$iso3c   <-  ifelse(ucdp_civil_war_only$COWcode==680, "YPR", ucdp_civil_war_only$iso3c)

# Finally, country-Year IDs
ucdp_civil_war_only$cow_year   <-   paste(ucdp_civil_war_only$COWcode, ucdp_civil_war_only$year, sep = "_")
ucdp_civil_war_only$vdem_year  <-   paste(ucdp_civil_war_only$VDemcode, ucdp_civil_war_only$year, sep = "_")
ucdp_civil_war_only$iso3c_year <-   paste(ucdp_civil_war_only$iso3c, ucdp_civil_war_only$year, sep = "_")

### Next, ensure that every Country-Year only exists once -- obs in UCDP are individual conflicts, so this needs to be aggregated.
# Count duplicates to get an idea
duplictes_ucdp <- sum(duplicated(ucdp_civil_war_only$vdem_year))
# 602 duplicates

# So, aggregate UCDP data down to Country-Years and the number of civil wars (3 or 4) in a given country year
ucdp_CY  <-  ucdp_civil_war_only %>%
  group_by(vdem_year)            %>%
  transmute(ucdp_intra_conflicts = sum(conflict_type_3or4),
            ucdp_inter_conflicts = sum(conflict_type_2))

# This still includes duplicate country years.
# Keep only the unique rows.
ucdp_CY <- unique(ucdp_CY)

# Create Civil War YN variable
ucdp_CY$ucdp_civil_war_YN =
  case_when(ucdp_CY$ucdp_intra_conflicts >  0 ~ 1,
            ucdp_CY$ucdp_intra_conflicts <  1 ~ 0)

# Add years back in
library("stringr")
ucdp_CY$year <- str_sub(ucdp_CY$vdem_year, - 4, - 1)
ucdp_CY$year <- as.numeric(ucdp_CY$year)

# And write
write_csv(ucdp_CY, "ucdp_CY.csv")

# Final export: 1950 onwards
ucdp_CY_1950_2020 <- subset(ucdp_CY, year > 1950)

### Interstate war -- Side B
ucdp_inter_war_instig_1985onw       <-    ucdp  %>%
  filter(conflict_type_2 == 1)  %>%
  filter(year > 1984)

ucdp_inter_war_instig_1985onw$gwno_b <- as.numeric(ucdp_inter_war_instig_1985onw$gwno_b)

ucdp_inter_war_instig_1985onw$COWcode <- countrycode(ucdp_inter_war_instig_1985onw$gwno_b,
                                                     origin = "gwn",
                                                     destination = "cown")

ucdp_inter_war_instig_1985onw$VDemcode   <-   countrycode(ucdp_inter_war_instig_1985onw$gwno_b,
                                                          origin = "gwn",
                                                          destination = "vdem")
#   Some values were not matched unambiguously: 2, 200, 816 -- one is "2, 200"
#   GWN     Country               VDem
#   816     North Vietnam         35
#   Assign manually:
ucdp_inter_war_instig_1985onw$VDemcode   <-  ifelse(ucdp_inter_war_instig_1985onw$gwno_b==816, 35, ucdp_inter_war_instig_1985onw$VDemcode)
ucdp_inter_war_instig_1985onw$VDemcode   <-  ifelse(ucdp_inter_war_instig_1985onw$gwno_b=="2, 200", 20, ucdp_inter_war_instig_1985onw$VDemcode)

# Finally, country-Year IDs
ucdp_inter_war_instig_1985onw$vdem_year  <-   paste(ucdp_inter_war_instig_1985onw$VDemcode,
                                                    ucdp_inter_war_instig_1985onw$year,
                                                    sep = "_")

### Next, ensure that every Country-Year only exists once -- obs in UCDP are individual conflicts, so this needs to be aggregated.
# Count duplicates to get an idea
duplictes_ucdp <- sum(duplicated(ucdp_inter_war_instig_1985onw$vdem_year))
# 0 duplicates

# So, aggregate UCDP data down to Country-Years and the number of civil wars (3 or 4) in a given country year
ucdp_CY_B  <-  ucdp_inter_war_instig_1985onw %>%
  group_by(vdem_year)            %>%
  transmute(ucdp_inter_conflicts = sum(conflict_type_2))

# This still includes duplicate country years.
# Keep only the unique rows.
ucdp_CY_B <- unique(ucdp_CY_B)

test <- bind_rows(ucdp_CY_1950_2020, ucdp_CY_B)

duplictes_ucdp <- sum(duplicated(test$vdem_year))
# 25

final <- test %>%
  group_by(vdem_year) %>%
  dplyr::summarise(ucdp_inter_conflicts = sum(ucdp_inter_conflicts),
                   ucdp_intra_conflicts = sum(ucdp_intra_conflicts, na.rm = T)) %>%
  as.data.frame()

# Create Civil War YN variable
final$ucdp_civil_war_YN =
  case_when(final$ucdp_intra_conflicts >  0 ~ 1,
            final$ucdp_intra_conflicts <  1 ~ 0)

final$ucdp_inter_war_YN =
  case_when(final$ucdp_inter_conflicts >  0 ~ 1,
            final$ucdp_inter_conflicts <  1 ~ 0)

# Add years back in
library("stringr")
final$year <- str_sub(final$vdem_year, - 4, - 1)
final$year <- as.numeric(final$year)

# Export
write_csv(final, "ucdp_CY_1950_2020.csv")




### Merge IST and V-Dem

# Coutry-Year indices
IST_CY$country_year    <- paste(IST_CY$target_vdem,   IST_CY$year,    sep = "_")
vdem_lim2$country_year <- paste(vdem_lim2$country_id, vdem_lim2$year, sep = "_")

vdem_ist <- merge(vdem_lim2,
                  IST_CY,
                  by = "country_year",
                  all.x = TRUE)

# Examine duplicates
vdem_ist <- vdem_ist %>% group_by(country_year) %>%
  dplyr::mutate(sum_sanc_regimes = n(),
         sum_HR           = sum(HR_YN),
         sum_DM           = sum(DM_YN),
         sum_TE           = sum(m_TE_YN),
         sum_AF_TB        = (m_AF_YN + m_TB_YN))

# Zero if NA -- sanctions
vdem_ist$sum_HR <- ifelse(is.na(vdem_ist$sum_HR), 0, vdem_ist$sum_HR)
vdem_ist$sum_DM <- ifelse(is.na(vdem_ist$sum_DM), 0, vdem_ist$sum_DM)
vdem_ist$sum_TE <- ifelse(is.na(vdem_ist$sum_TE), 0, vdem_ist$sum_TE)
vdem_ist$sum_AF_TB <- ifelse(is.na(vdem_ist$sum_AF_TB), 0, vdem_ist$sum_AF_TB)

# Zero if NA -- elections
vdem_ist$v2elwestmon    <- ifelse(is.na(vdem_ist$v2elwestmon   ), 0, vdem_ist$v2elwestmon   )
vdem_ist$v2elfrfair_osp <- ifelse(is.na(vdem_ist$v2elfrfair_osp), 0, vdem_ist$v2elfrfair_osp)
vdem_ist$v2elfrfair     <- ifelse(is.na(vdem_ist$v2elfrfair    ), 0, vdem_ist$v2elfrfair    )

vdem_ist$v2elmonden    <- ifelse(is.na(vdem_ist$v2elmonden), 0, vdem_ist$v2elmonden) 
vdem_ist$v2elmonref    <- ifelse(is.na(vdem_ist$v2elmonref), 0, vdem_ist$v2elmonref) 
vdem_ist$v2elvotbuy    <- ifelse(is.na(vdem_ist$v2elvotbuy), 0, vdem_ist$v2elvotbuy) 
vdem_ist$v2elasmoff    <- ifelse(is.na(vdem_ist$v2elasmoff), 0, vdem_ist$v2elasmoff)

# Drop duplicate country years
vdem_ist <-  vdem_ist[!duplicated(vdem_ist$country_year), ]

# (Re-) generate some more variables
vdem_ist$DM_YN       <- ifelse(vdem_ist$sum_DM > 0, 1, 0)
vdem_ist$HR_YN       <- ifelse(vdem_ist$sum_HR > 0, 1, 0)
vdem_ist$Dem_Sanc_YN <- ifelse((vdem_ist$DM_YN == 1 | vdem_ist$HR_YN == 1), 1, 0)

vdem_ist$any_TE_YN       <- ifelse(vdem_ist$sum_TE > 0, 1, 0)
vdem_ist$any_AF_TB_YN    <- ifelse(vdem_ist$sum_AF_TB > 0, 1, 0)

# "Non-democratic sanctions" (as vS/W do)
vdem_ist$any_sanc_YN        <- ifelse(is.na(vdem_ist$any_sanc_YN), 0, vdem_ist$any_sanc_YN)

vdem_ist$any_nondem_sanc_YN <- vdem_ist$any_nonDem_sanc

vdem_ist$only_nondem_sanc_YN <- ifelse((vdem_ist$any_sanc_YN == 1 & 
                                        vdem_ist$Dem_Sanc_YN == 0), 1, 0)

# Drop individual sanctions regime information because we will only be using the aggregated information.
vdem_ist <- vdem_ist %>% 
  select(-(sender:end))

vdem_ist <- vdem_ist %>% select(-year.y)



### Merge in coups, UNGA, and FDI

final_df <- merge(vdem_ist,
                  Colpus_Data_lim,
                  by.x = "country_year",
                  by.y = "vdem_year",
                  all.x = TRUE)

# Fill NAs -- Assumption: Colpus is complete
# "Colpus_Data_lim" only contains CYs that had coup attempts
final_df$s_coup_occurred <- ifelse(is.na(final_df$s_coup_occurred), 0, final_df$s_coup_occurred)

# Continue merging
final_df <- merge(final_df,
                  final,
                  by.x = "country_year",
                  by.y = "vdem_year",
                  all.x = TRUE)

final_df <- final_df %>% select(-year.x)

final_df2 <- merge(final_df,
                  Voeten_Data_comp,
                  by.x = "country_year",
                  by.y = "vdem_year",
                  all.x = TRUE)

final_df2 <- final_df2 %>% select(-year.y)

final_df3 <- merge(final_df2,
                   WDI_data,
                   by.x = "country_year",
                   by.y = "vdem_year",
                   all.x = TRUE)

final_df4 <- merge(final_df3,
                   Farriss_HR,
                   by.x = "country_year",
                   by.y = "vdem_year",
                   all.x = TRUE)

final_df5 <- final_df4 %>%
  left_join(country_year_trade, by = c("country_year" = "country_year"))

### Fill in zeros for wars
#   Assumption: UCDP is complete
final_df5$ucdp_civil_war_YN    <- ifelse(is.na(final_df5$ucdp_civil_war_YN   ), 0, final_df5$ucdp_civil_war_YN   )
final_df5$ucdp_inter_war_YN    <- ifelse(is.na(final_df5$ucdp_inter_war_YN   ), 0, final_df5$ucdp_inter_war_YN   )
final_df5$ucdp_intra_conflicts <- ifelse(is.na(final_df5$ucdp_intra_conflicts), 0, final_df5$ucdp_intra_conflicts)
final_df5$ucdp_inter_conflicts <- ifelse(is.na(final_df5$ucdp_inter_conflicts), 0, final_df5$ucdp_inter_conflicts)

# Calculate trade dependence
final_df5$trade_dep <- final_df5$total_trade_US_EU*1000000/final_df5$GDP15

### Lags

final_df5$year.x <- substr(final_df5$country_year, nchar(final_df5$country_year) - 3, nchar(final_df5$country_year))
final_df5$year.x <- as.numeric(final_df5$year.x)

# Some first-differenced variables
final_df5 <- final_df5 %>%
  dplyr::group_by(country_id) %>%
  dplyr::mutate(lag.Dem_Sanc_YN   = dplyr::lag(Dem_Sanc_YN,
                                               n = 1,
                                               default = NA,
                                               order_by = year.x),
                lag.DM_YN   = dplyr::lag(DM_YN,
                                               n = 1,
                                               default = NA,
                                               order_by = year.x),
                lag.HR_YN   = dplyr::lag(HR_YN,
                                         n = 1,
                                         default = NA,
                                         order_by = year.x),
                lag.any_nondem_sanc_YN   = dplyr::lag(any_nondem_sanc_YN,
                                               n = 1,
                                               default = NA,
                                               order_by = year.x),
                lag.only_nondem_sanc_YN  = dplyr::lag(only_nondem_sanc_YN,
                                                      n = 1,
                                                      default = NA,
                                                      order_by = year.x),
                lag.v2x_polyarchy   = dplyr::lag(v2x_polyarchy,
                                                      n = 1,
                                                      default = NA,
                                                      order_by = year.x),
                lag.e_miinflat    = dplyr::lag(e_miinflat,
                                               n = 1,
                                               default = NA,
                                               order_by = year.x),
                lag.FDI_pct_GDP    = dplyr::lag(FDI_pct_GDP,
                                               n = 1,
                                               default = NA,
                                               order_by = year.x),
                lag.INF_pct    = dplyr::lag(INF_pct,
                                                n = 1,
                                                default = NA,
                                                order_by = year.x),
                lag.INF_b_pct    = dplyr::lag(INF_b_pct,
                                            n = 1,
                                            default = NA,
                                            order_by = year.x),
                lag.GDPpc    = dplyr::lag(GDPpc,
                                              n = 1,
                                              default = NA,
                                              order_by = year.x),
                lag.GDPg    = dplyr::lag(GDPg,
                                              n = 1,
                                              default = NA,
                                              order_by = year.x),
                lag.GDP15    = dplyr::lag(GDP15,
                                         n = 1,
                                         default = NA,
                                         order_by = year.x),
                lag.OIL_rev15    = dplyr::lag(OIL_rev15,
                                         n = 1,
                                         default = NA,
                                         order_by = year.x),
                lag.e_gdppc       = dplyr::lag(e_gdppc,
                                               n = 1,
                                               default = NA,
                                               order_by = year.x),
                lag.dist_EU_US    = dplyr::lag(dist_EU_US,
                                               n = 1,
                                               default = NA,
                                               order_by = year.x),
                lag.agree_EU_US   = dplyr::lag(agree_EU_US,
                                               n = 1,
                                               default = NA,
                                               order_by = year.x),
                lag.v2cademmob    = dplyr::lag(v2cademmob,
                                               n = 1,
                                               default = NA,
                                               order_by = year.x),
                lag.v2csreprss    = dplyr::lag(v2csreprss,
                                               n = 1,
                                               default = NA,
                                               order_by = year.x),
                lag2.v2csreprss    = dplyr::lag(v2csreprss,
                                               n = 2,
                                               default = NA,
                                               order_by = year.x),
                lag.theta_mean    = dplyr::lag(theta_mean,
                                             n = 1,
                                             default = NA,
                                             order_by = year.x),
                lag.ucdp_civil_war_YN    = dplyr::lag(ucdp_civil_war_YN,
                                               n = 1,
                                               default = NA,
                                               order_by = year.x),
                lag.ucdp_inter_war_YN    = dplyr::lag(ucdp_inter_war_YN,
                                               n = 1,
                                               default = NA,
                                               order_by = year.x),
                lag.ucdp_intra_conflicts    = dplyr::lag(ucdp_intra_conflicts,
                                               n = 1,
                                               default = NA,
                                               order_by = year.x),
                lag.ucdp_inter_conflicts    = dplyr::lag(ucdp_inter_conflicts,
                                               n = 1,
                                               default = NA,
                                               order_by = year.x),
                lag.v2x_neopat    = dplyr::lag(v2x_neopat,
                                                         n = 1,
                                                         default = NA,
                                                         order_by = year.x),
                lag.trade_dep    = dplyr::lag(trade_dep,
                                               n = 1,
                                               default = NA,
                                               order_by = year.x)
                )

### Democratic sanctions onset
final_df5$Dem_Sanc_onset_YN <- ifelse((final_df5$lag.Dem_Sanc_YN == 0 & final_df5$Dem_Sanc_YN == 1), 1, 0)
sum_onsets <- sum(final_df5$Dem_Sanc_onset_YN, na.rm = T)
# 86 total Western DM + HR democratic sanctions onsets
# See also Table A7 in the Appendix, compiled below.

final_df5$DM_onset_YN <- ifelse((final_df5$lag.DM_YN == 0 & final_df5$DM_YN == 1), 1, 0)
sum_DM_onsets <- sum(final_df5$DM_onset_YN, na.rm = T)
# 72 total Western DM sanctions onsets 

final_df5$HR_onset_YN <- ifelse((final_df5$lag.HR_YN == 0 & final_df5$HR_YN == 1), 1, 0)
sum_HR_onsets <- sum(final_df5$HR_onset_YN, na.rm = T)
# 65 total Western HR sanctions onsets 

### Democratic sanctions ongoing
final_df5$Dem_Sanc_ongoing_YN <- ifelse((final_df5$lag.Dem_Sanc_YN == 1 & final_df5$Dem_Sanc_YN == 1), 1, 0)
sum_ongoing <- sum(final_df5$Dem_Sanc_ongoing_YN, na.rm = T)
# 766 country-years with ongoing sanctions

### Changes in civil society repression
final_df5$ch_csrepress     <- final_df5$v2csreprss - final_df5$lag.v2csreprss
final_df5$lag_ch_csrepress <- final_df5$lag.v2csreprss - final_df5$lag2.v2csreprss

### Changes in human rights
final_df5$ch_hr     <- final_df5$theta_mean - final_df5$lag.theta_mean
#final_df5$lag.ch_hr <- final_df5$lag.theta_mean - final_df5$lag2.theta_mean

### Changes in democracy
final_df5$ch_dem     <- final_df5$v2x_polyarchy - final_df5$lag.v2x_polyarchy

## War onset
final_df5$intra_war_increase_YN <- ifelse((final_df5$lag.ucdp_intra_conflicts <
                                           final_df5$ucdp_intra_conflicts), 1, 0)

final_df5$inter_war_increase_YN <- ifelse((final_df5$lag.ucdp_inter_conflicts <
                                           final_df5$ucdp_inter_conflicts), 1, 0)

### Cubic polynomials
# "t, t2, andt3 since the last event in all the models." -- i.e., dem sanc _onset_; as vS/W do.

final_df5 <- final_df5 %>%
  arrange(country_id, year.x) %>%
  group_by(country_id) %>%
  dplyr::mutate(time_since_sanc = cumsum(Dem_Sanc_onset_YN==0))

final_df5$time_since_sanc_2 <- (final_df5$time_since_sanc)^2
final_df5$time_since_sanc_3 <- (final_df5$time_since_sanc)^3

write_csv(final_df5, "final_df5.csv")

# Quick descriptive table
cont_elex <- final_df5 %>% 
  dplyr::filter(year.x < 2022) %>%
  group_by(year.x) %>%
  dplyr::summarise(n             = n(),
            sum_cont_elec = sum(cont_elec),
            sum_Dem_onset = sum(Dem_Sanc_onset_YN),
            sum_ce_onset  = sum(cont_elec == 1 & Dem_Sanc_onset_YN == 1),
            share         = sum_ce_onset/sum_cont_elec)

### Censor the data for certain analyses
# i.e. drop countries w/ dem sanctions already in place
final_df5_censored <- final_df5 %>%
  dplyr::filter(year.x < 2022) %>%
  dplyr::filter(lag.Dem_Sanc_YN == 0)

write_csv(final_df5_censored, "final_df5_censored.csv")

cont_elex_list <- final_df5 %>%
  dplyr::filter(year.x < 2019 & cont_elec == 1) %>%
  select(year.x, country_name.x, v2x_regime_amb, cont_elec, lag.DM_YN, DM_YN, HR_YN, lag.Dem_Sanc_YN, Dem_Sanc_YN)

cont_elex_list_short <- cont_elex_list %>%
  select(year.x, country_name.x, cont_elec, lag.DM_YN, DM_YN, HR_YN, lag.Dem_Sanc_YN, Dem_Sanc_YN)

cont_elex_list_short_cens <- cont_elex_list_short %>%
  dplyr::filter(lag.Dem_Sanc_YN == 0 & Dem_Sanc_YN == 1)





### Appendix A1
### Illustrative figure

# @knitr Fig-03

final_df5 <- read_csv("final_df5.csv")

library(patchwork)
library(ggplot2)

Dem_by_Year <- final_df5 %>% group_by(year.x) %>%
  dplyr::summarise(mean_VDem = mean(v2x_polyarchy, na.rm = T),
            mean_HRS  = mean(theta_mean, na.rm = T))

P1_dem <- ggplot(data = Dem_by_Year, aes(x = year.x)) +
  geom_line(aes(y = mean_VDem), color = "black")+
  ylab("Average country\ndemocracy score")+
  scale_y_continuous(breaks=seq(0.35, 0.55, 0.05), limits=c(0.35, 0.55))+
  scale_x_continuous(breaks=seq(1990, 2022, 5), limits=c(1988, 2022), minor_breaks = NULL)+
  theme_clean()+
  geom_vline(xintercept = 2012, linetype = "dotted")+
  labs(subtitle = c("Democracy (V-Dem Polyarchy)"))+
  theme(axis.title.x = element_blank(),
        plot.margin = margin(0.25, 0.25, 0.25, 0.25, "cm"),
        legend.position="bottom",
        axis.text = element_text(colour = "#000000"))

P2_hr <- ggplot(data = Dem_by_Year, aes(x = year.x)) +
  geom_line(aes(y = mean_HRS), color = "black")+
  ylab("Average country\nhuman rights score")+
  scale_y_continuous(breaks=seq(-0.1, 0.75, 0.1), limits=c(-0.1, 0.75))+
  scale_x_continuous(breaks=seq(1990, 2021, 5), limits=c(1988, 2022), minor_breaks = NULL)+
  theme_clean()+
  geom_vline(xintercept = 2014, linetype = "dotted")+
  labs(subtitle = c("Human rights (Human Rights Scores)"),
       caption = c("Note the truncated Y axes.\nV-Dem (democracy) ranges from 0-1.\nHRS (human rights) ranges from -3.5-5.5.\nDotted lines indicate high points (2012 and 2014)."))+
  theme(axis.title.x = element_blank(),
        plot.margin = margin(0.25, 0.25, 0.25, 0.25, "cm"),
        legend.position="bottom",
        axis.text = element_text(colour = "#000000"))

Fig_combined_Dem_HR <- (P1_dem / P2_hr)

ggsave("Fig_combined_Dem_HR.png",
       Fig_combined_Dem_HR,
       width = 20, height = 10, units = "cm")

Fig_combined_Dem_HR

ggsave("99-Fig-A02.png", Fig_combined_Dem_HR, width = 8, height = 5.5, dpi = 300)





### Appendix A10
#   Replications of von Soest/Wahman

# @knitr Tab-03

final_df5_censored <- read_csv("final_df5_censored.csv")

library(ggthemes)

# Subset 1990-2010 censored, as in original article
final_df5_censored_90_10 <- final_df5_censored %>% subset(year.x < 2011)
final_df5_censored_90_20 <- final_df5_censored %>% subset(year.x < 2023) # --2021

# Further limit the sample: All authoritarian country-years
# Defined in vS/W as less than 7.5/10 on a combined Freedom House/Polity IV scale

# Here: V-Dem's "Regimes of the World" from "closed autocracy" to "electoral democracy" --
# This seems most similar to vS/W in final sample size (i.e. 1846 obs in dataset - 451 missing in regression = 1395)
# Original paper has in 1219/1229 in final regression in Table 3
# Perhaps < 5 would be conceptually more accurate, though?
final_df5_censored_90_20_auth <- final_df5_censored_90_20 %>% subset(v2x_regime_amb < 6)
final_df5_censored_90_10_auth <- final_df5_censored_90_10 %>% subset(v2x_regime_amb < 6)

write.csv(final_df5_censored_90_20_auth, "final_df5_censored_90_20_auth.csv")

# Number of Onsets
sum_onset_censored_90_10_auth  <- sum(final_df5_censored_90_10_auth$DM_onset_YN == 1, na.rm = T)  # with UN: 44
sum_onset_censored_90_20_auth  <- sum(final_df5_censored_90_20_auth$DM_onset_YN == 1, na.rm = T)  # with UN: 61

### Table I

logit_tab1 <- glm(Dem_Sanc_onset_YN ~
                    s_coup_occurred +
                    cont_elec +
                    ch_csrepress +
                    v2x_polyarchy +
                    lag.only_nondem_sanc_YN +     # This is in line with vS/W's phrasing and variables -- it must be non-dem, and no dem sanc
                    time_since_sanc +
                    time_since_sanc_2 +
                    time_since_sanc_3
                  ,
                  family = binomial(link='logit'),
                  data   = final_df5_censored_90_10_auth)

# summary(logit_tab1)

logit_tab1_robust_SEs <- coeftest(logit_tab1,
                                  vcov. = vcovCL(logit_tab1,
                                                 cluster = final_df5_censored_90_10_auth$country_id,
                                                 type = "HC1"))

# Pseudo-R2
library(pscl)
# pR2_logit_tab1 <- pR2(logit_tab1) # pR2 = 0.232

### Table III

final_df5_censored_96_10_auth <- final_df5_censored_90_10_auth %>% filter(year.x > 1995)

# Regular logit, 1990-2010
logit_rep <- glm(Dem_Sanc_onset_YN ~
                   s_coup_occurred +
                   cont_elec +
                   lag.only_nondem_sanc_YN +     # This is in line with vS/W's phrasing and variables -- it must be non-dem, and no dem sanc
                   v2cademmob +    
                   
                   lag.GDPg +
                   lag.INF_b_pct +
                   lag.GDPpc +
                   lag.dist_EU_US +
                   
                   lag.OIL_rev15 +
                   lag.FDI_pct_GDP +
                   
                   time_since_sanc +
                   time_since_sanc_2 +
                   time_since_sanc_3
                 ,
                 family = binomial(link='logit'),
                 data   = final_df5_censored_90_10_auth)

# summary(logit_rep)

logit_rep_robust_SEs <- coeftest(logit_rep,
                                 vcov. = vcovCL(logit_rep,
                                                cluster = final_df5_censored_90_10_auth$country_id,
                                                type = "HC1"))

# logit_rep_robust_SEs 

# pR2_logit_tab3 <- pR2(logit_rep)
# McFadden's pR2        = 0.275
# Cragg and Uhler's pR2 = 0.306

# Observations
n_obs_logit_rep <- count(na.omit(logit_rep[["model"]], cols = all.vars(f))) # // 1375
# 1375 observations;
# Original paper has 1219

# Rare events logit as a test -- 1990--2010
library(logistf)
# 'logistf' rather than 'Zelig'

z.out1 <- logistf(Dem_Sanc_onset_YN ~
                    s_coup_occurred +
                         cont_elec +
                    lag.only_nondem_sanc_YN +     # This is in line with vS/W's phrasing and variables -- it must be non-dem, and no dem sanc
                         v2cademmob +    
                         lag.GDPg +
                         lag.INF_b_pct +
                         lag.GDPpc +
                         lag.dist_EU_US +
                         #lag.OIL_rev15  +     # This variable gives an error if included, maybe something about the scale
                         lag.FDI_pct_GDP +
                         time_since_sanc +
                         time_since_sanc_2 +
                         time_since_sanc_3
                       ,
                       data = final_df5_censored_90_10_auth,
                       na.action = na.omit,
                       control = logistf.control(maxit=1000, maxstep=10)
                       )

# summary(z.out1)

# Number of included obs is 1389 (1512 in dataset)

# Extended timeframe

# Regular logit
logit_ext <- glm(Dem_Sanc_onset_YN ~
                   s_coup_occurred +
                   cont_elec +
                   lag.only_nondem_sanc_YN +     # This is in line with vS/W's phrasing and variables -- it must be non-dem, and no dem sanc
                   v2cademmob +    
                   
                   lag.GDPg +
                   lag.GDPpc +
                   lag.dist_EU_US +
                   
                   lag.OIL_rev15 +
                   lag.FDI_pct_GDP +
                   
                   time_since_sanc +
                   time_since_sanc_2 +
                   time_since_sanc_3
                 ,
                 family = binomial(link='logit'),
                 data   = final_df5_censored_90_20_auth)

# summary(logit_ext)

logit_ext_robust_SEs <- coeftest(logit_ext,
                                 vcov. = vcovCL(logit_ext,
                                                cluster = final_df5_censored_90_20_auth$country_id,
                                                type = "HC1"))

# pR2_logit_ext_tab3 <- pR2(logit_ext) #
# McFadden's pR2        = 0.190
# Cragg and Uhler's pR2 = 0.215

# Observations
n_obs_logit_ext <- count(na.omit(logit_ext[["model"]], cols = all.vars(f)))
# 2016

# Rare events logit as a test
z.out2 <- logistf(Dem_Sanc_onset_YN ~
                    s_coup_occurred +
                         cont_elec +
                         intra_war_increase_YN +
                    lag.only_nondem_sanc_YN +     # This is in line with vS/W's phrasing and variables -- it must be non-dem, and no dem sanc
                    v2cademmob +    
                         
                         lag.GDPg +
                         lag.INF_b_pct +
                         lag.GDPpc +
                         lag.dist_EU_US +
                         
                         #lag.OIL_rev15 +
                         lag.FDI_pct_GDP +
                         
                         time_since_sanc +
                         time_since_sanc_2 +
                         time_since_sanc_3
                         ,
                         data = final_df5_censored_90_20_auth,
                         na.action = na.omit,
                         control = logistf.control(maxit=1000, maxstep=10)
)

# summary(z.out2)

# Table for Appendix: Replication of vS/W reconstructed
library(modelsummary)
library(kableExtra)

# List
models <- list("Logit"    = logit_rep,
               "RE logit" = z.out1,
               "Logit"    = logit_ext,
               "RE Logit" = z.out2)

caption <- 'Replications and extensions of von Soest and Wahman 2015a; Table 3.'

var_names <- c( "(Intercept)"                    = "Constant",
                "s_coup_occurred"                   = "Coup",
                "cont_elec" = "Controversial election",
                "lag.only_nondem_sanc_YN" = "Any other sanction (t-1)",
                "v2cademmob" = "Pro-democracy protests",
                "lag.GDPg" = "GDP growth (t-1)",
                "lag.INF_b_pct" = "GDP inflation (t–1)",
                "lag.GDPpc" = "GDP per capita (t-1)",
                "lag.dist_EU_US" = "Political closeness (t-1)",
                "lag.OIL_rev15" = "Oil revenue (t-1)",
                "lag.FDI_pct_GDP" = "FDI as share of GDP (t–1)")

# Compile
table_3_models <- modelsummary(models,
                               output = 'kableExtra',
                               fmt = 3,
                               stars = c("†" = 0.1, "*" = 0.05, "**" = 0.01, "***" = 0.001),
                               coef_map = var_names,
                               title = caption
)


# Print
table_3_models <- table_3_models %>%
  add_header_above(c(" " = 1, "1990--2010 (replication)" = 2, "1990--2021 (extension)" = 2)) %>%
  add_header_above(c(" " = 1, "DV: 'democratic sanctions' onset" = 4)) %>%
  add_footnote("Cubic polynomials not displayed. Robust SEs.", notation = "none") %>%
  add_footnote("GDP per capita in thousands, Oil revenue in millions.", notation = "none") %>%
  kable_styling(font_size = 9) %>%
  kable_styling(latex_options = "HOLD_position") # %>%

table_3_models

cat(table_3_models, file = "99-Tab-A05.tex")
cat(table_3_models, file = "99-Tab-A05.html")





### Appendix A4
#   Coincidence of democracy and human rights as Western sanctions objectives

# @knitr Tab-02

final_df5 <- read.csv("final_df5.csv")

library(kableExtra)

crosstab_Dem_HR <- final_df5 %>%
  group_by(DM_YN == 1) %>%
  dplyr::summarise(sum_HR_N = sum(HR_YN == 0),
            sum_HR_Y = sum(HR_YN == 1)
            )

crosstab_Dem_HR$`DM_YN == 1` <- c("N", "Y")

Tab_A02 <- kbl(crosstab_Dem_HR,
    booktabs  = T,
    align     = ('lcc'),
    caption = "Coincidence of democracy and human rights as sanctions objectives, 1990--2021.",
    col.names = c("Democracy",
                  "N",
                  "Y")) %>%
  kable_classic(full_width = FALSE) %>%
  add_header_above(c(" " = 1, "Human rights" = 2))  %>%
  add_footnote("Observations are country-years.",
               notation = "none") %>%
  kable_styling(latex_options = "HOLD_position")

Tab_A02

cat(Tab_A02, file = "99-Tab-A02.tex")
cat(Tab_A02, file = "99-Tab-A02.html")





### Appendix A9
#   Controversial elections and coups figure

# @knitr Fig-01

# Controversial elections

library(tidyr)

final_df5 <- read_csv("final_df5.csv")

# Quick descriptive table
cont_elex_cens <- final_df5 %>%
  dplyr::filter(year.x < 2022 & year.x > 1989) %>%
  dplyr::mutate(bins = cut(year.x, breaks=c(1989, 1994, 1999, 2004, 2009, 2014, 2022))) %>%
  group_by(bins) %>%
  dplyr::summarise(n               = n(),
                   sum_cont_elec   = sum(cont_elec),
                   sum_coups       = sum(s_coup_occurred, na.rm = T),
                   sum_Dem_Sanc_onset   = sum(Dem_Sanc_onset_YN, na.rm = T),
                   sum_ce_and_onset    = sum(cont_elec == 1 & Dem_Sanc_onset_YN == 1, na.rm = T),
                   sum_ce_no_onset     = sum(cont_elec == 1 & Dem_Sanc_onset_YN == 0, na.rm = T),
                   sum_coup_and_onset  = sum(s_coup_occurred == 1 & Dem_Sanc_onset_YN == 1, na.rm = T),
                   sum_coup_no_onset  = sum(s_coup_occurred == 1 & Dem_Sanc_onset_YN == 0, na.rm = T),
                   share_ce        = sum_ce_and_onset/sum_cont_elec,
                   share_coups     = sum_coup_and_onset/sum_coups)

cont_elex_cens$bins <- as.numeric(cont_elex_cens$bins)

long <- cont_elex_cens %>% gather(bins, sum_ce_no_onset, sum_ce_and_onset)

long <- cont_elex_cens %>% select (bins, sum_ce_no_onset, sum_ce_and_onset) %>%
  pivot_longer(!bins, names_to = "event", values_to = "count")

long$bins <- ifelse(long$bins == 1, "1990-94", long$bins)
long$bins <- ifelse(long$bins == 2, "1995-99", long$bins)
long$bins <- ifelse(long$bins == 3, "2000-04", long$bins)
long$bins <- ifelse(long$bins == 4, "2005-09", long$bins)
long$bins <- ifelse(long$bins == 5, "2010-14", long$bins)
long$bins <- ifelse(long$bins == 6, "2015-21", long$bins)

long <- long[!is.na(long$bins), ]

# Stacked
cont_elex_fig <- ggplot(long, aes(fill=forcats::fct_rev(event), y=count, x=bins)) + 
  geom_bar(position="stack", stat="identity", show.legend = FALSE) +
  scale_fill_grey(start = .8, end = 0)+
  xlab(element_blank())+
  ylab("Controversial elections")+
  scale_y_continuous(breaks=seq(0, 80, 10), limits=c(0, 80))+
  annotate("text", x = 1, y = 35, hjust = 0.5, label = "3 of 16\n(19%)\nmet with\nsanctions", color = "black", lineheight = .8)+
  annotate("text", x = 2, y = 35, hjust = 0.5, label = "1 of 32\n(3%)\n\n", color = "black", lineheight = .8)+
  annotate("text", x = 3, y = 35, hjust = 0.5, label = "3 of 29\n(10%)\n\n", color = "black", lineheight = .8)+
  annotate("text", x = 4, y = 35, hjust = 0.5, label = "0 of 45\n(0%)\n\n", color = "black", lineheight = .8)+
  annotate("text", x = 5, y = 35, hjust = 0.5, label = "2 of 57\n(4%)\n\n", color = "black", lineheight = .8)+
  annotate("text", x = 6, y = 35, hjust = 0.5, label = "2 of 77\n(3%)\n\n", color = "black", lineheight = .8)+
  annotate("text", x = 2, y = 56, hjust = 0.5, label = "1 in 11 (9%)", color = "black", lineheight = .8)+
  annotate("text", x = 5, y = 56, hjust = 0.5, label = "1 in 50 (2%)", color = "black", lineheight = .8)+
  annotate("segment", x = 0.7, xend = 3.3, y = 52, yend = 52, colour = "black")+
  annotate("segment", x = 3.7, xend = 6.3, y = 52, yend = 52, colour = "black")+
  labs(caption = "Data is censored, i.e., observations with sanctions already in place are excluded.\n'Controversial election' = Monitors denied, monitors refused to participate, or allegations of fraud.\nSanctions must be democracy- or human rights-related.")+
  theme_clean()

# Coups figure

long <- cont_elex_cens %>% gather(bins, sum_coup_no_onset, sum_coup_and_onset)

long <- cont_elex_cens %>% select (bins, sum_coup_no_onset, sum_coup_and_onset) %>%
  pivot_longer(!bins, names_to = "event", values_to = "count")

long$bins <- ifelse(long$bins == 1, "1990-94", long$bins)
long$bins <- ifelse(long$bins == 2, "1995-99", long$bins)
long$bins <- ifelse(long$bins == 3, "2000-04", long$bins)
long$bins <- ifelse(long$bins == 4, "2005-09", long$bins)
long$bins <- ifelse(long$bins == 5, "2010-14", long$bins)
long$bins <- ifelse(long$bins == 6, "2015-21", long$bins)

long <- long[!is.na(long$bins), ]

# Stacked
coups_fig <- ggplot(long, aes(fill=forcats::fct_rev(event), y=count, x=bins)) + 
  geom_bar(position="stack", stat="identity", show.legend = FALSE) +
  scale_fill_grey(start = .8, end = 0)+
  xlab(element_blank())+
  ylab("Successful coups")+
  scale_y_continuous(breaks=seq(0, 15, 5), limits=c(0, 15))+
  annotate("text", x = 1, y = 8, hjust = 0.5, label = "4 of 16\n(25%)\nmet with\nsanctions", color = "black", lineheight = .8)+
  annotate("text", x = 2, y = 8, hjust = 0.5, label = "6 of 12\n(50%)\n\n", color = "black", lineheight = .8)+
  annotate("text", x = 3, y = 8, hjust = 0.5, label = "2 of 7\n(29%)\n\n", color = "black", lineheight = .8)+
  annotate("text", x = 4, y = 8, hjust = 0.5, label = "5 of 9\n(56%)\n\n", color = "black", lineheight = .8)+
  annotate("text", x = 5, y = 8, hjust = 0.5, label = "3 of 10\n(30%)\n\n", color = "black", lineheight = .8)+
  annotate("text", x = 6, y = 8, hjust = 0.5, label = "0 of 11\n(0%)\n\n", color = "black", lineheight = .8)+
  annotate("text", x = 2, y = 14, hjust = 0.5, label = "1 in 2.9 (34%)", color = "black", lineheight = .8)+
  annotate("text", x = 5, y = 14, hjust = 0.5, label = "1 in 3.8 (27%)", color = "black", lineheight = .8)+
  annotate("segment", x = 0.7, xend = 3.3, y = 13, yend = 13, colour = "black")+
  annotate("segment", x = 3.7, xend = 6.3, y = 13, yend = 13, colour = "black")+
  theme_clean()

# Combine

library(patchwork)

combined_fig <- coups_fig + cont_elex_fig

combined_fig

ggsave("99-Fig-A06.png", combined_fig, width = 11, height = 5.5, dpi = 300)





# Illustrative figure on targeted sanctions and sanction frequency

# @knitr Fig-02

library(readr)
library(ggthemes)
library(patchwork)

## IST DATA

IST <- read_csv("final_df5.csv")

crosstab_measures <- IST %>%
  group_by(year.x) %>%
  summarize(sanctioned   = sum(any_sanc_YN,  na.rm = T),
            TE_sum       = sum(sum_TE,       na.rm = T),
            TE_any       = sum(any_TE_YN,    na.rm = T),
            AF_TB_sum    = sum(sum_AF_TB,    na.rm = T),
            AF_TB_any    = sum(any_AF_TB_YN, na.rm = T),
            HR_any       = sum(HR_YN, na.rm = T),
            DM_any       = sum(DM_YN, na.rm = T),
            Dem_Sanc_any = sum(Dem_Sanc_YN, na.rm = T),
            share_TE     = TE_any/sanctioned,
            share_AF_TB  = AF_TB_any/sanctioned)

Fig_A1 <- ggplot(data = crosstab_measures, aes(x = year.x)) +
  geom_line(aes(y = share_AF_TB),    color = "black", linetype = "longdash")+
  geom_line(aes(y = share_TE), color = "black")+
  annotate("text", x = 2022.5, y = 0.5319149, hjust = 0, 
           lineheight = .8, label = "Asset freezes,\ntravel bans", color = "black")+
  annotate("text", x = 2022.5, y = 0.1276596, hjust = 0,
           lineheight = .8, label = "Embargoes,\nblockades", color = "black")+
  xlab(element_blank())+
  ylab("Share of all countries\nsanctioned by UN, EU, and/or US")+
  scale_y_continuous(labels = scales::percent_format(scale = 100), breaks=seq(0, 0.6, 0.1), limits=c(0, 0.63))+
  scale_x_continuous(breaks=c(1990, 1995, 2000, 2005, 2010, 2015, 2020),
                     limits=c(1988, 2027), minor_breaks = NULL)+
  theme_clean()

Fig_A2 <- ggplot(data = crosstab_measures, aes(x = year.x, y = sanctioned)) +
  geom_bar(fill = "black", stat = "identity") +
  xlab(element_blank())+
  ylab("Number of countries\nsanctioned by UN, EU, and/or US")+
  scale_y_continuous(breaks=seq(0, 50, 10), limits=c(0, 50))+
  scale_x_continuous(breaks=c(1990, 1995, 2000, 2005, 2010, 2015, 2020),
                     limits=c(1988, 2027), minor_breaks = NULL)+
  theme_clean()+
  labs(caption = c("Data: Attia and Grauvogel 2023.\nUS ICC-related sanctions removed."))

Fig_02_comp_AG <- (Fig_A1 / Fig_A2) + plot_annotation(tag_levels = 'A')

Fig_02_comp_AG

ggsave("99-Fig-01.png", Fig_02_comp_AG, width = 8.1, height = 5.5, dpi = 300)




### Appendix A1
#   Number of countries under 'democratic sanctions' by year

# @knitr Fig-Dem-Sanc

library(readr)
library(ggthemes)

IST <- read_csv("final_df5.csv")

crosstab_measures <- IST %>%
  group_by(year.x) %>%
  summarize(sanctioned   = sum(any_sanc_YN,  na.rm = T),
            HR_any       = sum(HR_YN, na.rm = T),
            DM_any       = sum(DM_YN, na.rm = T),
            Dem_Sanc_any = sum(Dem_Sanc_YN, na.rm = T),
            Dem_Sanc_onset = sum(Dem_Sanc_onset_YN, na.rm = T)
            )

crosstab_sum <- IST %>%
  summarize(sanctioned   = sum(any_sanc_YN,  na.rm = T),
            HR_any       = sum(HR_YN, na.rm = T),
            DM_any       = sum(DM_YN, na.rm = T),
            Dem_Sanc_any = sum(Dem_Sanc_YN, na.rm = T),
            Dem_Sanc_onset = sum(Dem_Sanc_onset_YN, na.rm = T)
  )

Fig_Dem_Sanc <- ggplot(data = crosstab_measures, aes(x = year.x, y = Dem_Sanc_any)) +
  geom_bar(fill = "black", stat = "identity") +
  xlab(element_blank())+
  ylab("Number of countries under 'democratic sanctions'\nby UN, EU, and/or US")+
  scale_y_continuous(breaks=seq(0, 35, 10), limits=c(0, 35))+
  scale_x_continuous(breaks=c(1990, 1995, 2000, 2005, 2010, 2015, 2020),   
                     limits=c(1989, 2022), minor_breaks = NULL)+
  theme_clean()+
  labs(caption = c("Data: Attia and Grauvogel 2023."))

Fig_Dem_Sanc

ggsave("99-Fig-A01.png", Fig_Dem_Sanc, width = 8, height = 4.5, dpi = 300)





#### Appendix A7: Treatment variation
#    Matching procedure via PanelMatch

# Treatment variation plot
# @knitr Fig-04

library(PanelMatch)

final_df5_90_21 <- read.csv("final_df5.csv")

# Index
final_df5_90_21$year <- as.integer(str_sub(final_df5_90_21$country_year,-4,-1))
final_df5_90_21      <- as.data.frame(final_df5_90_21)

final_df5_90_21 <- final_df5_90_21 %>% filter(year > 1988 & year < 2022)
# 5667 obs

write.csv(final_df5_90_21, "final_df5_90_21.csv")

# View the treatment and data availability
# Treatment variation plot
treat_var_plot <- DisplayTreatment(unit.id = "country_id",
                                   time.id = "year",
                                   legend.position = "none",
                                   xlab = "Year",
                                   ylab = "Countries",
                                   treatment = "Dem_Sanc_YN",
                                   data = final_df5_90_21,
                                   color.of.treated = "black",
                                   color.of.untreated = "grey",
                                   dense.plot = T) + theme_classic() +   theme(legend.position = "none",
                                                                               axis.text.y  = element_blank(),
                                                                               axis.ticks.x = element_blank(),
                                                                               axis.ticks.y = element_blank(),
                                                                               axis.text.x  = element_blank())

### Some quick descriptives
desc_IST_VDem <- final_df5_90_21 %>%
  dplyr::filter(year > 1989 & year < 2022)  %>%
  group_by(country_name.x)              %>%
  dplyr::summarize(
    n                = n(),
    years_sanc       = sum(Dem_Sanc_ongoing_YN, na.rm = T),
    years_sanc_share = years_sanc/n) %>%
  arrange(desc(years_sanc_share)) %>%
  ungroup()

# plot(treat_var_plot, cex = 1.5)

# Add total share of sanctioned years
final_df5_90_21 <- final_df5_90_21 %>%
  group_by(country_id) %>%
  filter(!is.na(DM_YN)) %>%
  dplyr::mutate(n = n(),
         sanc = sum(Dem_Sanc_YN, na.rm = T),
         share = sanc/n) %>%
  arrange(desc(share)) %>%
  ungroup()

### As a more flexible ggplot
## Needs to be geom_tile rather than geom_raster

library(ggthemes)
library(forcats)

# Colors
colors <- c('#666666', 'black')
b      <- c(0, 1)

# COuntry name as factor
final_df5_90_21$country_id_fac <- as.factor(final_df5_90_21$country_id)

# Order names by share of sanctioned/treated years
final_df5_90_21 <- final_df5_90_21 %>%
  dplyr::mutate(country_id_fac = fct_reorder(country_id_fac, share, .fun='median'))

# Basic plot
treat_vari_plot <- ggplot(final_df5_90_21, aes(year,
                                                country_id_fac,
                                                fill = Dem_Sanc_YN, na.rm = T)) +
  xlim(1989.5, 2025)+
  geom_tile()+
  ylab("Countries")+
  xlab(element_blank())+
  scale_fill_gradientn(colors = colors, breaks = b, labels = format(b), na.value = NA)

# PLot and add some manual annotations
treat_vari_plot <- treat_vari_plot +  theme_classic() + theme(legend.position = "none",
                                           axis.text.y  = element_blank(),
                                           axis.ticks.y = element_blank(),
                                           axis.text.x  = element_text(color="black"))+
  annotate("text", x = 2022, y = 178, label = "MMR (100%)", hjust = 0)+
  annotate("text", x = 2022, y = 168, label = "LBR (81%)", hjust = 0)+
  annotate("text", x = 2022, y = 158, label = "FJI (38%)", hjust = 0)+
  annotate("text", x = 2022, y = 145, label = "RUS (31%)", hjust = 0)+
  annotate("text", x = 2022, y = 130, label = "EGY (25%)", hjust = 0)+
  annotate("text", x = 2022, y = 115, label = "MLI (9%)", hjust = 0)+
  annotate("text", x = 2022, y = 70, label = "POL (0%)", hjust = 0)+
  annotate("text", x = 2022, y = 40, label = "TZN (0%)", hjust = 0)+
  annotate("text", x = 2022, y = 7, label = "URY (0%)", hjust = 0)

treat_vari_plot

# Inspect
ever_DM_sanc <- final_df5_90_21 %>% group_by(country_id) %>% dplyr::summarise(n = mean(share))
# 74/181 = 0.4088398 of states have been sanctioned w dem sanctions, roughly.

ggsave("99-Fig-A03.png", treat_vari_plot, width = 8, height = 4, dpi = 300)





### Section 4.3 in the main manuscript
#   Predicting democratic sanctions onset faithfully to vS/W's approach

# @knitr Tab-04

library(logistf)

final_df5_censored <- read_csv("final_df5_censored.csv")

library(ggthemes)

# Subset 1990-2010 censored, as in original article
final_df5_censored_90_10 <- final_df5_censored %>% subset(year.x < 2011)
final_df5_censored_90_20 <- final_df5_censored %>% subset(year.x < 2023) # --2021

# Further limit the sample: All authoritarian country-years, as vS/W do
# Defined by vS/W as less than 7.5/10 on a combined Freedom House/Polity IV scale

# Here: V-Dem's "Regimes of the World" from "closed autocracy" to "electoral democracy lower bound" --
# This seems most similar to vS/W in final sample size (i.e. 1846 obs in dataset - 451 missing in regression = 1395)
# Original paper has in 1219/1229 in final regression in Table 3
final_df5_censored_90_20_auth <- final_df5_censored_90_20 %>% subset(v2x_regime_amb < 6)
final_df5_censored_90_10_auth <- final_df5_censored_90_10 %>% subset(v2x_regime_amb < 6)

write.csv(final_df5_censored_90_20_auth, "final_df5_censored_90_20_auth.csv")

### RE Logits for manuscript
final_df5_censored_90_20_auth <- read.csv("final_df5_censored_90_20_auth.csv")

# Log GDP
final_df5_censored_90_20_auth$lag.GDPpc_log <- log(final_df5_censored_90_20_auth$lag.GDPpc)

# Rare events logit as a test -- 1990--2020
# Replication of vSW
z.out3 <- logistf(Dem_Sanc_onset_YN ~
                         s_coup_occurred +
                         cont_elec +
                         intra_war_increase_YN +
                         inter_war_increase_YN +
                         v2cademmob +    
                         lag.GDPg +
                         log(lag.GDPpc) +
                         lag.INF_b_pct +
                         lag.dist_EU_US +
                              lag.v2x_polyarchy +
                              lag.theta_mean +
                              lag_ch_csrepress +
                              lag.v2x_neopat +
                         time_since_sanc +
                         time_since_sanc_2 +
                         time_since_sanc_3
                         ,
                         data = final_df5_censored_90_20_auth,
                         na.action = na.omit,
                         control = logistf.control(maxit=1000, maxstep=10)
                         )

# modelsummary(z.out3, stars = T)

# Analysis for Reviewer 1 asking about trade linkage with the US and EU
z.out3b <- logistf(Dem_Sanc_onset_YN ~
                    s_coup_occurred +
                    cont_elec +
                    intra_war_increase_YN +
                    inter_war_increase_YN +
                    v2cademmob +    
                    lag.GDPg +
                    log(lag.GDPpc) +
                    lag.INF_b_pct +
                        lag.trade_dep +  ### 
                    lag.dist_EU_US +
                    lag.v2x_polyarchy +
                    lag.theta_mean +
                    lag_ch_csrepress +
                    lag.v2x_neopat +
                    time_since_sanc +
                    time_since_sanc_2 +
                    time_since_sanc_3
                  ,
                  data = final_df5_censored_90_20_auth,
                  na.action = na.omit,
                  control = logistf.control(maxit=1000, maxstep=10)
)

# modelsummary(z.out3b, stars = T)

# Further relevant variables

z.out4 <- logistf(Dem_Sanc_onset_YN ~
                         s_coup_occurred +
                         cont_elec +
                         intra_war_increase_YN +
                         v2cademmob +    
                         #lag.GDPg +
                         #lag.GDPpc_log +
                         lag.dist_EU_US +
                         lag.v2x_polyarchy +
                         lag.theta_mean +
                         #lag_ch_csrepress +
                         #lag.v2x_neopat +
                         time_since_sanc +
                         time_since_sanc_2 +
                         time_since_sanc_3
                         ,
                         data = final_df5_censored_90_20_auth,
                         na.action = na.omit,
                         control = logistf.control(maxit=1000, maxstep=10)
)

# modelsummary(z.out4, stars = T)

final_df5_censored_90_20_auth$t_events_count <- (final_df5_censored_90_20_auth$s_coup_occurred +
                                                 final_df5_censored_90_20_auth$cont_elec)

# sum(final_df5_censored_90_20_auth$t_events_count, na.rm = T)
# 222 events

# Refined model

z.out5 <- logistf(Dem_Sanc_onset_YN ~
                         t_events_count +   
                         intra_war_increase_YN +
                         #inter_war_increase_YN +
                         v2cademmob +    
                         #lag.GDPg +
                         #lag.INF_b_pct +
                         #lag.GDPpc_log +
                         lag.dist_EU_US +
                         #lag.OIL_rev15 +
                         #lag.FDI_pct_GDP +
                         lag.v2x_polyarchy +
                         lag.theta_mean +
                         #lag_ch_csrepress +
                         #lag.v2x_neopat +
                         time_since_sanc +
                         time_since_sanc_2 +
                         time_since_sanc_3
                         ,
                         data = final_df5_censored_90_20_auth,
                         na.action = na.omit,
                         control = logistf.control(maxit=1000, maxstep=10)
                         )

# modelsummary(z.out5, stars = T)

# Compiled table for body of paper
# Predicting the imposition of democracy- and human rights-related sanctions, 1990--2021.
# Refer to the manuscript for a substantive description of the models (created above)
library(modelsummary)
library(kableExtra)

# List
models <- list("Full"        = z.out3,
               "Limited"     = z.out4,
               "Simplified"  = z.out5)

caption <- 'Predicting the imposition of democracy- and human rights-related sanctions, 1990--2021.'

var_names <- c( "(Intercept)"                    = "Constant",
                "s_coup_occurred"                   = "Any successful coup (t)",
                "cwc_anysadcoup"                 = "Any successful anti-dem. coup (t)",
                "cont_elec" = "Controversial election (t)",
                "t_events_index" = "Trigger events index (t)",
                "t_events_count" = "Trigger events (count; t)",
                "intra_war_increase_YN" = "Increase in intrastate war (t)",
                "inter_war_increase_YN" = "Increase in interstate war (t)",
                "v2cademmob" = "Pro-democracy protests (t)",
                "lag.any_nondem_sanc_YN" = "Any other sanction (t-1)",
                "lag.trade_dep" = "Trade with US/EU (% of GDP; t-1)",
                "lag.GDPg" = "GDP growth (t-1)",
                "lag.INF_b_pct" = "GDP inflation (t–1)",
                "log(lag.GDPpc)" = "GDP per capita (log; t-1)",
                "lag.dist_EU_US" = "Political proximity (t-1)",
                "lag.OIL_rev15" = "Oil revenue (t-1)",
                "lag.FDI_pct_GDP" = "FDI as share of GDP (t–1)",
                "lag_ch_csrepress" = "Change in civil soc. repression (t-1)",
                "lag.v2x_neopat" = "Neopatrimonialism (t-1)",
                "ucdp_civil_war_YN" = "Civil war ongoing (Y/N, t)",
                "lag.v2x_polyarchy" = "Democracy (Polyarchy; t-1)",
                "lag.theta_mean" = "Human rights (HRS; t-1)"
                )

# Compile
table_1_models <- modelsummary(models,
                               output = 'kableExtra',
                               fmt = 3,
                               stars = c("†" = 0.1, "*" = 0.05, "**" = 0.01, "***" = 0.001),
                               coef_map = var_names,
                               title = caption
)

# Print
table_1_models <- table_1_models %>%
  add_header_above(c(" " = 1, "DV: `democratic sanctions' onset" = 3)) %>%
  kableExtra::footnote(general = c("Cubic polynomials not displayed. Robust SEs.",
                       "Models are Firth's bias-reduced logistic regressions."))

table_1_models

cat(table_1_models, file = "99-Tab-02.tex")
cat(table_1_models, file = "99-Tab-02.html")




### Appendix A11: Trade as a potential confounder
#   Table A6

# @knitr Tab-Trade
# Table for Reviewer 1 re: trade linkage
# Note that this is a somewhat shorter timeframe due to COW's coverage

library(logistf)

### RE Logits for body of the paper
final_df5_censored_90_20_auth <- read.csv("final_df5_censored_90_20_auth.csv")

# Log GDP
final_df5_censored_90_20_auth$lag.GDPpc_log <- log(final_df5_censored_90_20_auth$lag.GDPpc)

# List
models_R1 <- list("M1"       = z.out3b)

caption <- 'Predicting the imposition of democracy- and human rights-related sanctions, 1990--2014.'

var_names <- c( "(Intercept)"                    = "Constant",
                "s_coup_occurred"                = "Any successful coup (t)",
                "cont_elec" = "Controversial election (t)",
                "intra_war_increase_YN" = "Increase in intrastate war (t)",
                "inter_war_increase_YN" = "Increase in interstate war (t)",
                "t_events_index" = "Trigger events index (t)",
                "t_events_count" = "Trigger events (count; t)",
                "v2cademmob" = "Pro-democracy protests (t)",
                "lag.any_nondem_sanc_YN" = "Any other sanction (t-1)",
                "lag.trade_dep" = "Trade with US/EU (% of GDP; t-1)",
                "lag.GDPg" = "GDP growth (t-1)",
                "lag.INF_b_pct" = "GDP inflation (t–1)",
                "log(lag.GDPpc)" = "GDP per capita (log; t-1)",
                "lag.dist_EU_US" = "Political proximity (t-1)",
                "lag.OIL_rev15" = "Oil revenue (t-1)",
                "lag.FDI_pct_GDP" = "FDI as share of GDP (t–1)",
                "lag_ch_csrepress" = "Change in civil soc. repression (t-1)",
                "lag.v2x_neopat" = "Neopatrimonialism (t-1)",
                "ucdp_civil_war_YN" = "Civil war ongoing (Y/N, t)",
                "lag.v2x_polyarchy" = "Democracy (Polyarchy; t-1)",
                "lag.theta_mean" = "Human rights (HRS; t-1)"
)

# Compile
table_R1_models <- modelsummary(models_R1,
                               output = 'kableExtra',
                               fmt = 3,
                               stars = c("†" = 0.1, "*" = 0.05, "**" = 0.01, "***" = 0.001),
                               coef_map = var_names,
                               title = caption
)

table_R1_models <- table_R1_models %>%
  add_header_above(c(" " = 1, "DV: `democratic sanctions' onset" = 1)) %>%
  row_spec(13, background = "gray!20") %>%
  row_spec(14, background = "gray!20") %>%
  kableExtra::footnote(general = c("Cubic polynomials not displayed. Robust SEs.",
                                   "Models are Firth's bias-reduced logistic regressions."))

table_R1_models

cat(table_R1_models, file = "99-Tab-06.tex")
cat(table_R1_models, file = "99-Tab-06.html")




### Section 5.3 in the main paper
### Main result: DiD for democracy
#   Figure 2

# @knitr Fig-DM

library(ggthemes)

set.seed(123)

final_df5_90_21 <- read.csv("final_df5.csv")

# Index
final_df5_90_21$year <- as.integer(str_sub(final_df5_90_21$country_year,-4,-1))
final_df5_90_21      <- as.data.frame(final_df5_90_21)

final_df5_90_21 <- final_df5_90_21 %>% filter(year > 1988 & year < 2022)
# 5667 obs

write.csv(final_df5_90_21, "final_df5_90_21.csv")

final_df5_90_21 <- read.csv("final_df5_90_21.csv")

# Keep only a few vars
final_df5_90_21_b <- final_df5_90_21 %>% select(year,
                                      country_id,
                                      s_coup_occurred,
                                      cont_elec,
                                      intra_war_increase_YN,
                                      GDPg,
                                      dist_EU_US,
                                      v2cademmob,
                                      v2x_polyarchy,
                                      theta_mean,
                                      DM_YN,
                                      HR_YN,
                                      Dem_Sanc_YN,
                                      lag.Dem_Sanc_YN,
                                      any_nondem_sanc_YN,
                                      only_nondem_sanc_YN)

final_df5_90_21_b$t_events_count <- (final_df5_90_21$s_coup_occurred +
                                     final_df5_90_21$cont_elec)

final_df5_90_21_b <- as.data.frame(final_df5_90_21_b)

write_csv(final_df5_90_21_b, "final_df5_90_21_b.csv")

# Subset some more, for split analyses as robustness checks

final_df5_90_04_b <- final_df5_90_21_b %>% subset(year < 2005)
final_df5_05_21_b <- final_df5_90_21_b %>% subset(year > 2004)

write.csv(final_df5_90_04_b, "final_df5_90_04_b.csv")
write.csv(final_df5_05_21_b, "final_df5_05_21_b.csv")

# PanelMatch procedure

PM.results <- PanelMatch(lag = 4,   
                         time.id = "year",
                         unit.id = "country_id",
                         treatment = "Dem_Sanc_YN",
                         outcome.var = "v2x_polyarchy",
                         refinement.method = "CBPS.weight",
                         data = final_df5_90_21_b,
                         covs.formula = ~ 
                           I(lag(t_events_count, 0:4))         + # Include the year of sanctions onset, as outlined
                           I(lag(intra_war_increase_YN, 0:4))  +
                           I(lag(v2cademmob, 1:4))          +
                           I(lag(dist_EU_US, 1:4))        ,  
                         qoi = "att",
                         lead = 0:3,                          # Estimate through t+3, three on each "side" of the treatment
                         forbid.treatment.reversal = FALSE,
                         use.diagonal.variance.matrix = TRUE,
                         placebo.test = TRUE
)

# Check balance

balance <- get_covariate_balance(PM.results$att,
                                 data = final_df5_90_21_b,
                                 covariates = c(
                                   "t_events_count",  
                                   "intra_war_increase_YN",
                                   "v2cademmob",
                                   "dist_EU_US"
                                 ), 
                                 ylab = "SD",
                                 plot = F)

# A bit of naming and formatting
balance     <- as_tibble(balance)
balance     <- tibble::rownames_to_column(balance,     "Time")

balance$Time <- ifelse(balance$Time == "1", "t--4", balance$Time)
balance$Time <- ifelse(balance$Time == "2", "t--3", balance$Time)
balance$Time <- ifelse(balance$Time == "3", "t--2", balance$Time)
balance$Time <- ifelse(balance$Time == "4", "t--1", balance$Time)
balance$Time <- ifelse(balance$Time == "5", "t+0",  balance$Time)

write_csv(balance, "balance_dem.csv")

# Results
PE.results <- PanelEstimate(sets = PM.results,
                            number.iterations = 1000,
                            data = final_df5_90_21_b)

#PE.results[["estimates"]]
#plot(PE.results)
#summary(PE.results)

capture <- capture.output(results_table <- summary(PE.results))

results_table <- results_table[["summary"]]
results_table <- as.data.frame(results_table)
results_table <- tibble::rownames_to_column(results_table,     "Time")

results_table$lower <- results_table$`2.5%`
results_table$upper <- results_table$`97.5%`

# Manually calculate 95% CIs just to be safe and to double-check
results_table$man_lower <- results_table$estimate - (1.96*results_table$std.error)
results_table$man_upper <- results_table$estimate + (1.96*results_table$std.error)
# These are very similar

# Levels of t, so ggplot doesn't switch to alphabetical
results_table$Time <- as.character(results_table$Time)
results_table$Time <- factor(results_table$Time, levels=unique(results_table$Time))

# Add a zero for t-1, as Cunnigham has and previous versions of PanelMatch had -- for ease of interpretation
results_table <- results_table %>% add_row(Time = "t-1",
                                           estimate = 0,
                                           .before = 1)

# Plot
results_plot_dem <- ggplot(results_table,
                       aes(Time, estimate, group = 1))+
  geom_hline(yintercept=0) +
  geom_vline(
    aes(xintercept =  stage(Time, after_scale = 1.5)),
    linetype = "dashed") +
  geom_line()+
  geom_errorbar(width=.1, aes(ymin = lower, ymax = upper)) +
  geom_point(shape=21, size=3, fill = "white") +
  scale_y_continuous(limits = c(-0.135, 0.12),
                     breaks = seq(-0.12, 0.12, by = 0.02),
                     labels = scales::number_format(accuracy = 0.01))+
  annotate("text", x = 1.4, y= 0.05, label = "Sanctions imposition", hjust = 0)+
  labs(y = "Estimated Difference in Democracy\n(V-Dem Polyarchy, 0-1)",
       x = element_blank()
  ) +
  theme_clean()

### Placebo tests and pre-treatment -- Democracy outcome

# https://github.com/insongkim/PanelMatch/issues/92
#remove.packages("PanelMatch")
#library(devtools)
#install_github("insongkim/PanelMatch", ref = "se_comparison", dependencies=TRUE)

set.seed(123)

placebo_tab <- placebo_test(PM.results, data = final_df5_90_21_b, number.iterations = 1000, plot = F, confidence.level = 0.95)

dm_estimates <- placebo_tab[["bootstrapped.estimates"]]
dm_estimates <- as.data.frame(dm_estimates)

dm_estimates_mean_t4 <- mean(dm_estimates$`t-4`)
dm_estimates_mean_t3 <- mean(dm_estimates$`t-3`)
dm_estimates_mean_t2 <- mean(dm_estimates$`t-2`)

# https://github.com/insongkim/PanelMatch/issues/132#issuecomment-1795026977
# install_github("insongkim/PanelMatch", dependencies=TRUE, ref = "se_comparison")

pt <- placebo_test(PM.results, data = final_df5_90_21_b, se.method = "unconditional", plot = FALSE)

ses_past <- pt[["standard.errors"]]

# Create table
dm_past_estimates <- data.frame(
                                Time = c("t-4", "t-3", "t-2"),
                                estimate = c(dm_estimates_mean_t4, dm_estimates_mean_t3, dm_estimates_mean_t2),
                                std.error = ses_past,
                                `2.5%` = NA,
                                `97.5%` = NA,
                                lower = NA,
                                upper = NA,
                                man_lower = NA,
                                man_upper = NA
                              )

# Manually calculate 95% CIs just to be safe and to double-check, as above --
dm_past_estimates$man_lower <- dm_past_estimates$estimate - (1.96*dm_past_estimates$std.error)
dm_past_estimates$man_upper <- dm_past_estimates$estimate + (1.96*dm_past_estimates$std.error)

# These indeed seem to be identical to those in the figure

dm_past_estimates$lower <- dm_past_estimates$man_lower
dm_past_estimates$upper <- dm_past_estimates$man_upper

# Merge
# Add these to the t+1, t+2,... estimates table created above
estimates_full <- rbind(dm_past_estimates, setNames(results_table, names(dm_past_estimates)))
estimates_full$Time <- factor(estimates_full$Time, levels = c("t-4", "t-3", "t-2", "t-1", "t+0", "t+1", "t+2", "t+3"))

# Plot the whole thing
results_plot_dem <- ggplot(estimates_full,
                           aes(Time, estimate, group = 1))+
  geom_hline(yintercept=0) +
  geom_vline(
    aes(xintercept =  stage(Time, after_scale = 4.5)),
    linetype = "dashed") +
  geom_line()+
  geom_errorbar(width=.1, aes(ymin = lower, ymax = upper)) +
  geom_point(shape=21, size=3, fill = "white") +
  scale_y_continuous(limits = c(-0.135, 0.12),
                     breaks = seq(0.12, -0.125, by = -0.02),
                     labels = scales::number_format(accuracy = 0.01))+
  annotate("text", x = 4.35, y= 0.05, label = "Sanctions imposition", hjust = 1)+
  labs(y = "Estimated Difference in Democracy\n(V-Dem Polyarchy, 0-1)",
       x = element_blank()
  ) +
  theme_clean()

results_plot_dem

ggsave("99-Fig-02.png",
       results_plot_dem,
       width = 7, height = 2.8)



### Section 5.3 in the main paper
### DiD for human rights
#   Figure 3

# @knitr Fig-HR

set.seed(123)

final_df5_90_21 <- read.csv("final_df5_90_21_b.csv")

PM.results <- PanelMatch(lag = 4,  
                         time.id = "year",
                         unit.id = "country_id",
                         treatment = "Dem_Sanc_YN",
                         outcome.var = "theta_mean",
                         refinement.method = "CBPS.weight",
                         data = final_df5_90_21,
                         covs.formula = ~ 
                           I(lag(t_events_count, 0:4))         + # Include the year of sanctions onset, as outlines
                           I(lag(intra_war_increase_YN, 0:4))  +
                           I(lag(v2cademmob, 1:4))          +
                           I(lag(dist_EU_US, 1:4))        ,  
                         qoi = "att",
                         lead = 0:3,                          # Three on each "side" of the treatment
                         forbid.treatment.reversal = FALSE,
                         use.diagonal.variance.matrix = TRUE,
                         placebo.test = TRUE
)

# Check balance
balance <- get_covariate_balance(PM.results$att,
                                 data = final_df5_90_21,
                                 covariates = c(
                                   "t_events_count",  
                                   "intra_war_increase_YN",
                                   "v2cademmob",
                                   "dist_EU_US"
                                 ), 
                                 ylab = "SD",
                                 plot = F)

# A bit of naming and formatting
balance     <- as_tibble(balance)
balance     <- tibble::rownames_to_column(balance,     "Time")

balance$Time <- ifelse(balance$Time == "1", "t--3", balance$Time)
balance$Time <- ifelse(balance$Time == "2", "t--2", balance$Time)
balance$Time <- ifelse(balance$Time == "3", "t--1", balance$Time)
balance$Time <- ifelse(balance$Time == "4", "t",    balance$Time)

write_csv(balance, "balance_hr.csv")

balance_hr <- balance %>%      # Balance is identical to Dem outcome table
  mutate_all(linebreak) %>%
  knitr::kable(caption = "Weighted/refined/balanced control groups: Covariate balance from t--3 to t, outcome: human rights.",
               booktabs = T,
               escape   = F,
               digits   = 2,
               align = ('lccccc'),
               col.names = c("Time",
                             "Trigger Events",
                             "Intrastate war increase",
                             "Pro-dem. protest",
                             "Pol. proximity"
               )) %>%
  kable_styling(font_size = 9) %>%
  add_footnote(c("Trigger Events and Conflict Increases used for weighting at t--4 to t0, all others at t--4 to t--1.",
                 "Standardized mean differences, in SDs."), notation="none")

# Results
PE.results <- PanelEstimate(sets = PM.results,
                            number.iterations = 1000,
                            data = final_df5_90_21)

#PE.results[["estimates"]]
#plot(PE.results)
#summary(PE.results)

capture <- capture.output(results_table <- summary(PE.results))

results_table <- results_table[["summary"]]
results_table <- as.data.frame(results_table)
results_table <- tibble::rownames_to_column(results_table,     "Time")

results_table$lower <- results_table$`2.5%`
results_table$upper <- results_table$`97.5%`

# Manually calculate 95% CIs just to be safe and to double-check
results_table$man_lower <- results_table$estimate - (1.96*results_table$std.error)
results_table$man_upper <- results_table$estimate + (1.96*results_table$std.error)

# Levels of t, so ggplot doesn't switch to alphabetical
results_table$Time <- as.character(results_table$Time)
results_table$Time <- factor(results_table$Time, levels=unique(results_table$Time))

# Add a zero for t-1, as Cunningham has and previous versions of PanelMatch had -- for ease of interpretation
results_table <- results_table %>% add_row(Time = "t-1",
                                           estimate = 0,
                                           .before = 1)

# Plot
results_plot_hr <- ggplot(results_table,
                           aes(Time, estimate, group = 1))+
  geom_hline(yintercept=0) +
  geom_vline(
    aes(xintercept =  stage(Time, after_scale = 1.5)),
    linetype = "dashed") +
  geom_line()+
  geom_errorbar(width=.1, aes(ymin = lower, ymax = upper)) +
  geom_point(shape=21, size=3, fill = "white") +
  scale_y_continuous(limits = c(-0.5, 0.5),
                     breaks = seq(-0.5, 0.5, by = 0.1),
                     labels = scales::number_format(accuracy = 0.1))+
  annotate("text", x = 1.4, y= 0.25, label = "Sanctions imposition", hjust = 0)+
  labs(y = "Estimated Difference in Human Rights\n(HRS, -3.5-5.5)",
       x = element_blank()
  ) +
  theme_clean()

### Placebo tests and pre-treatment -- HR

set.seed(123)

placebo_tab <- placebo_test(PM.results, data = final_df5_90_21, number.iterations = 1000, plot = F, confidence.level = 0.95)

hr_estimates <- placebo_tab[["bootstrapped.estimates"]]
hr_estimates <- as.data.frame(hr_estimates)

hr_estimates_mean_t4 <- mean(hr_estimates$`t-4`)
hr_estimates_mean_t3 <- mean(hr_estimates$`t-3`)
hr_estimates_mean_t2 <- mean(hr_estimates$`t-2`)

# https://github.com/insongkim/PanelMatch/issues/132#issuecomment-1795026977
# install_github("insongkim/PanelMatch", dependencies=TRUE, ref = "se_comparison")

pt <- placebo_test(PM.results, data = final_df5_90_21, se.method = "unconditional", plot = FALSE)

ses_past <- pt[["standard.errors"]]

# Create table
hr_past_estimates <- data.frame(
  Time = c("t-4", "t-3", "t-2"),
  estimate = c(hr_estimates_mean_t4, hr_estimates_mean_t3, hr_estimates_mean_t2),
  std.error = ses_past,
  `2.5%` = NA,
  `97.5%` = NA,
  lower = NA,
  upper = NA,
  man_lower = NA,
  man_upper = NA
)

# Manually calculate 95% CIs just to be safe and to double-check, as above --

hr_past_estimates$man_lower <- hr_past_estimates$estimate - (1.96*hr_past_estimates$std.error)
hr_past_estimates$man_upper <- hr_past_estimates$estimate + (1.96*hr_past_estimates$std.error)

# These indeed seem to be identical to those in the figure

hr_past_estimates$lower <- hr_past_estimates$man_lower
hr_past_estimates$upper <- hr_past_estimates$man_upper

# Merge
# Add these to the t+1, t+2,... estimates table created above
estimates_full <- rbind(hr_past_estimates, setNames(results_table, names(hr_past_estimates)))
estimates_full$Time <- factor(estimates_full$Time, levels = c("t-4", "t-3", "t-2", "t-1", "t+0", "t+1", "t+2", "t+3"))

# Plot the whole thing
results_plot_hr <- ggplot(estimates_full,
                          aes(Time, estimate, group = 1))+
  geom_hline(yintercept=0) +
  geom_vline(
    aes(xintercept =  stage(Time, after_scale = 4.5)),
    linetype = "dashed") +
  geom_line()+
  geom_errorbar(width=.1, aes(ymin = lower, ymax = upper)) +
  geom_point(shape=21, size=3, fill = "white") +
  scale_y_continuous(limits = c(-0.5, 0.5),
                     breaks = seq(-0.5, 0.5, by = 0.1),
                     labels = scales::number_format(accuracy = 0.1))+
  annotate("text", x = 4.35, y= 0.25, label = "Sanctions imposition", hjust = 1)+
  labs(y = "Estimated Difference in Human Rights\n(HRS, -3.5-5.5)",
       x = element_blank()
  ) +
  theme_clean()

results_plot_hr

ggsave("99-Fig-03.png",
       results_plot_hr,
       width = 7, height = 2.8)





### Table A7 (Appendix 12): Cases of onset used in the analysis

#   @knitr app-tab-list

# Re-run the analysis just to get the matched sets
set.seed(123)
final_df5_90_21 <- read.csv("final_df5_90_21_b.csv")

final_df5_90_21$Dem_Sanc_Onset <- ifelse(final_df5_90_21$Dem_Sanc_YN == 1 & final_df5_90_21$lag.Dem_Sanc_YN == 0, 1, 0)

PM.results <- PanelMatch(lag = 4,                           # Same exact procedure as above to reproduce the matches
                         time.id = "year",
                         unit.id = "country_id",
                         treatment = "Dem_Sanc_YN",
                         refinement.method = "CBPS.weight",
                         data = final_df5_90_21,
                         covs.formula = ~ 
                           I(lag(t_events_count, 0:4))         +
                           I(lag(intra_war_increase_YN, 0:4))  +
                           I(lag(v2cademmob, 1:4))          +
                           I(lag(dist_EU_US, 1:4))        ,  
                         qoi = "att",
                         outcome.var = "theta_mean",
                         lead = 0:3,                        
                         forbid.treatment.reversal = FALSE,
                         use.diagonal.variance.matrix = TRUE,
                         placebo.test = TRUE
                         )

# Results
PE.results <- PanelEstimate(sets = PM.results,
                            number.iterations = 1000,
                            data = final_df5_90_21)

# Examine these a bit more
quiet <- function(x) {
  sink(tempfile())
  on.exit(sink())
  invisible(force(x))
}

matched_sets_inspect <- quiet(print(PE.results[["matched.sets"]]))

matched_sets_inspect$cname <- countrycode(matched_sets_inspect[, 1],
                                          origin = "vdem",
                                          destination = "country.name")

matched_sets_inspect <- matched_sets_inspect %>% arrange(year)

matched_sets_inspect_simple <- matched_sets_inspect[c("cname","year")]

matched_sets_inspect_simple$cname <- ifelse(matched_sets_inspect_simple$cname == "Congo - Kinshasa",
                                            "DR Congo",
                                            matched_sets_inspect_simple$cname)
matched_sets_inspect_simple$cname <- ifelse(matched_sets_inspect_simple$cname == "Congo - Brazzaville",
                                            "Republic of Congo",
                                            matched_sets_inspect_simple$cname)
matched_sets_inspect_simple$cname <- ifelse(matched_sets_inspect_simple$cname == "Bosnia & Herzegovina",
                                            "Bosnia and Herzegovina",
                                            matched_sets_inspect_simple$cname)

# Unmatched
final_df5_90_21$cname <- countrycode(final_df5_90_21[, 2],
                                     origin = "vdem",
                                     destination = "country.name")

unmatched_sets_inspect <- final_df5_90_21 %>%
  filter(Dem_Sanc_Onset == 1) %>%
  dplyr::select(cname, year)  %>%
  arrange(year)

unmatched_sets_inspect$cname <- ifelse(unmatched_sets_inspect$cname == "Congo - Kinshasa",
                                            "DR Congo",
                                       unmatched_sets_inspect$cname)
unmatched_sets_inspect$cname <- ifelse(unmatched_sets_inspect$cname == "Congo - Brazzaville",
                                       "Republic of Congo",
                                       unmatched_sets_inspect$cname)
unmatched_sets_inspect$cname <- ifelse(unmatched_sets_inspect$cname == "Bosnia & Herzegovina",
                                            "Bosnia and Herzegovina",
                                       unmatched_sets_inspect$cname)

# Unmatched -- Some tidying up
unmatched_sets_inspect_simple <- unmatched_sets_inspect        %>%
  mutate(unmatched_sets_inspect, count = row_number()) %>%
  dplyr::select(count, everything())

# Matched -- Some tidying up
matched_sets_inspect_simple <- matched_sets_inspect_simple        %>%
  mutate(matched_sets_inspect_simple, count = row_number()) %>%
  dplyr::select(count, everything())

# Drop obs from unmatched
unmatched_filtered <- unmatched_sets_inspect_simple %>%
  anti_join(matched_sets_inspect_simple, by = c("cname", "year")) %>%
  mutate(count = row_number()) 

unmatched_filtered <- unmatched_filtered %>%
  mutate(reason = case_when(
    year <= 1993 ~ "Too few lag years",
    year >= 2019 ~ "Too few lead years",
    TRUE ~ NA_character_  # Fill with NA for other years
  ))
  
# Append horizontally
# Determine max number of rows
max_rows <- max(nrow(matched_sets_inspect_simple), nrow(unmatched_filtered))

# Pad matched_sets_inspect_simple
matched_padded <- matched_sets_inspect_simple %>%
  slice(1:max_rows) %>%
  add_row(.rows = max_rows - nrow(matched_sets_inspect_simple))

# Pad unmatched_filtered
unmatched_padded <- unmatched_filtered %>%
  slice(1:max_rows) %>%
  add_row(.rows = max_rows - nrow(unmatched_filtered))

# Bind side-by-side
matched_horizontal <- bind_cols(matched_padded, unmatched_padded) %>% select(-`count...4`)

# Replace NA with blank strings
matched_horizontal_clean <- matched_horizontal %>%
  mutate_all(~ ifelse(is.na(.), "", as.character(.)))

# Tables
matched_horizontal_clean <- matched_horizontal_clean %>%
  knitr::kable(
    caption  = "Included and dropped cases of democracy and human rights-related sanctions onset, 1990--2021.",
    booktabs = T,
    col.names = c("\\#",
                  "Country",
                  "Year",
                  "Country",
                  "Year",
                  "Reason dropped"),
    escape   = F,
    align=('clclcc')) %>%
  add_header_above(c(" " = 1, "Included" = 2, "Dropped" = 3)) %>%
  kable_styling(font_size = 7)

matched_horizontal_clean

cat(matched_horizontal_clean, file = "99-Tab-A07.tex")
cat(matched_horizontal_clean, file = "99-Tab-A07.html")





### Summary statistics -- Table A1, Appendix A3

# @knitr tab-stats

library(vtable)
library(kableExtra)

final_df5_90_21 <- read.csv("final_df5_90_21_b.csv")

final_df5_90_21 <- final_df5_90_21 %>% filter(year > 1989)

# Summary statistics
summary_stats <- st(final_df5_90_21, out = "csv")

summary_stats$Variable <- ifelse(summary_stats$Variable == "s_coup_occurred", "Successful coup (Colpus/Chin et al.)"         , summary_stats$Variable)
summary_stats$Variable <- ifelse(summary_stats$Variable == "cont_elec", "Controversial election Y/N (V-Dem)"         , summary_stats$Variable)
summary_stats$Variable <- ifelse(summary_stats$Variable == "theta_mean", "Human rights (HRS/Fariss)"                            , summary_stats$Variable)
summary_stats$Variable <- ifelse(summary_stats$Variable == "v2x_polyarchy", "Democracy (V-Dem Polyarchy)"                     , summary_stats$Variable)
summary_stats$Variable <- ifelse(summary_stats$Variable == "intra_war_increase_YN", "Increase in intrastate war (UCDP)"         , summary_stats$Variable)
summary_stats$Variable <- ifelse(summary_stats$Variable == "v2cademmob", "Mobilization for democracy (V-Dem)"               , summary_stats$Variable)
summary_stats$Variable <- ifelse(summary_stats$Variable == "Dem_Sanc_YN", "Democratic sanctions in place Y/N (IST)"                 , summary_stats$Variable)
summary_stats$Variable <- ifelse(summary_stats$Variable == "only_nondem_sanc_YN", "Only non-democratic sanctions in place Y/N (IST)"  , summary_stats$Variable)
summary_stats$Variable <- ifelse(summary_stats$Variable == "GDPg", "GDP growth (World Bank)"  , summary_stats$Variable)
summary_stats$Variable <- ifelse(summary_stats$Variable == "t_events_count", "Trigger events count"  , summary_stats$Variable)
summary_stats$Variable <- ifelse(summary_stats$Variable == "dist_EU_US", "Political proximity to US and EU"  , summary_stats$Variable)

summary_stats$share <- as.numeric(summary_stats$N)/5667
summary_stats$share <- scales::label_percent(big.mark = ",", suffix = "\\%", accuracy = 0.1)(summary_stats$share)

summary_stats <- summary_stats               %>%
  dplyr::select(Variable, N, share, Min, Mean, Max, `Std. Dev.`) %>%
  slice(13, 16, 9, 10, 3, 4, 16, 5, 7, 8, 6)
  
comp <- final_df5_90_21[, c("Dem_Sanc_YN",
                            "only_nondem_sanc_YN",
                            "v2x_polyarchy",
                            "theta_mean",
                            "s_coup_occurred",
                            "cont_elec",
                            "t_events_count",
                            "intra_war_increase_YN",
                            "dist_EU_US",
                            "v2cademmob",
                            "GDPg"
                            )]

comp_list <- lapply(comp, na.omit)
comp_list <- lapply(comp_list, scale)

summary_stats <- dplyr::rename(summary_stats, `Coverage` = share)
summary_stats <- dplyr::rename(summary_stats, SD = `Std. Dev.`)

summary_stats$N             <- as.numeric(summary_stats$N)
summary_stats$Min           <- as.numeric(summary_stats$Min)
summary_stats$Mean          <- as.numeric(summary_stats$Mean)
summary_stats$Max           <- as.numeric(summary_stats$Max)
summary_stats$SD            <-  as.numeric(summary_stats$SD)
summary_stats$Distribution  <-  ""

summary_stats <- rename(summary_stats, "Variable (N = 5.667)" = Variable)

# Table
summary_stats <- summary_stats %>%
  mutate_all(linebreak) %>%
  knitr::kable(
    caption  = "Summary statistics for the main variables, 1990--2021.",
    booktabs = TRUE,
    escape   = FALSE,
    linesep  = "",
    digits   = c(0, 0, 1, 2, 2, 2, 2),
    align    = 'lccrrrcc'
  ) %>%
  kable_styling(latex_options = c("scale_down", "HOLD_position")) %>%
  column_spec(column = 8, image = spec_hist(
    comp_list,
    same_lim = FALSE,
    col = "black",
    border = "black",
    breaks = 12
  )) %>%
  kableExtra::group_rows("Sanctions (treatment)", 1, 2) %>%
  kableExtra::group_rows("Democracy and human rights (outcomes)", 3, 4) %>%
  kableExtra::group_rows("Confounders: trigger events", 5, 7) %>%
  kableExtra::group_rows("Confounders: other factors", 8, 11)

summary_stats

cat(summary_stats, file = "99-Tab-A01.tex")
cat(summary_stats, file = "99-Tab-A01.html")






### Table 3, main text -- Balance table

# Print the balance table from above
# @knitr Tab-balance

balance <- read_csv("balance_dem.csv") # Dem and HR are identical, see the CSVs

balance <- balance %>%
  mutate_all(linebreak) %>%
  knitr::kable(caption = "Weighted/refined/balanced control groups: Covariate balance from t--4 to t+0, both outcomes.",
               booktabs = T,
               escape   = F,
               digits   = 2,
               align = ('lccccc'),
               col.names = c("Time",
                             "Trigger Events",
                             "Intrastate war increase",
                             "Pro-dem. protest",
                             "Pol. proximity"
               )) %>%
  kable_styling(font_size = 9) %>%
  add_footnote(c("Trigger events and conflict increases used for weighting at t--4 to t+0, all others at t--4 to t--1.",
                 "Standardized mean differences, in SDs."), notation="none")

balance

cat(balance, file = "99-Tab-03.tex")
cat(balance, file = "99-Tab-03.html")




### Illustration: Burundi 2015 -- Appendix A13, Table A8

# @knitr Tab-05

library(PanelMatch)

final_df5_90_21_b <- read.csv("final_df5_90_21_b.csv")

PM.results <- PanelMatch(lag = 4,         
                         time.id = "year",
                         unit.id = "country_id",
                         treatment = "Dem_Sanc_YN",
                         refinement.method = "CBPS.weight",
                         data = final_df5_90_21_b,
                         covs.formula = ~ 
                           I(lag(t_events_count, 0:4))         +
                           I(lag(intra_war_increase_YN, 0:4))  +
                           I(lag(v2cademmob, 1:4))          +
                           I(lag(dist_EU_US, 1:4))      ,
                         qoi = "att",
                         outcome.var = "theta_mean",
                         lead = 0:3,             
                         forbid.treatment.reversal = FALSE,
                         use.diagonal.variance.matrix = TRUE,
                         placebo.test = T
)

BDI2015_matches    <- PM.results[["att"]][["69.2015"]]
test01             <- attr(BDI2015_matches,'weights')
test02             <- as.data.frame(test01)
test02             <- tibble::rownames_to_column(test02, "Country")
test02$Country     <- as.numeric(test02$Country)
test02$countryname <- countrycode(test02$Country,
                                  origin = "vdem",
                                  destination = "country.name")
test02 <- test02 %>% arrange(desc(test01))

test02$rank <- 1:nrow(test02)
test02 <- test02 %>%
  dplyr::select(rank, everything())

top10_BDI2015    <- test02 %>% slice_head(n = 10)
bottom5_BDI2015  <- test02 %>% slice_tail(n = 5)

# We are interested in t--1 here, so the 2014 values
top10_BDI2015$vdem_year   <- paste(top10_BDI2015$Country, "_2014", sep = "")
bottom5_BDI2015$vdem_year <- paste(bottom5_BDI2015$Country, "_2014", sep = "")

# Bind  together
BDI2015_ex_weights <- rbind(top10_BDI2015,
                            bottom5_BDI2015)

final_df5_90_21_b$vdem_year <- paste(final_df5_90_21_b$country_id,
                                     final_df5_90_21_b$year,
                                     sep = "_")

final_df5_90_21_b$country_name <- countrycode(final_df5_90_21_b$country_id, "vdem", "country.name")
# 128 is Palestine

vars2 <- c("vdem_year",
           "country_name",
           "year",
           "t_events_count",
           "intra_war_increase_YN",
           "v2cademmob",
           "dist_EU_US",
           "v2x_polyarchy",
           "theta_mean"
)

final_df5_90_21_b_BDI2015 <- final_df5_90_21_b[vars2]

final_df5_90_21_b_BDI2015 <- final_df5_90_21_b_BDI2015 %>%
  dplyr::group_by(country_name) %>%
  dplyr::mutate(lag.t_events_count   = dplyr::lag(t_events_count,
                                                  n = 1,
                                                  default = NA,
                                                  order_by = year))

final_df5_90_21_b_BDI2015 <- final_df5_90_21_b_BDI2015 %>%
  dplyr::group_by(country_name) %>%
  dplyr::mutate(lead.t_events_count   = dplyr::lead(t_events_count,
                                                   n = 1,
                                                   default = NA,
                                                   order_by = year))

final_df5_90_21_b_BDI2015$mean_t_events_count <- (final_df5_90_21_b_BDI2015$t_events_count + 
                                                  final_df5_90_21_b_BDI2015$lead.t_events_count)/2

final_df5_90_21_b_BDI2015 <- final_df5_90_21_b_BDI2015 %>%
  dplyr::group_by(country_name) %>%
  dplyr::filter(year < 2015 & year > 2012) %>%
  dplyr::mutate(mean_t_events_count           = mean_t_events_count,
         mean_intra_war_increase_YN    = mean(intra_war_increase_YN, na.rm = T),
         mean_v2cademmob               = mean(v2cademmob, na.rm = T),
         mean_dist_EU_US               = mean(dist_EU_US, na.rm = T),
         mean_v2x_polyarchy            = mean(v2x_polyarchy, na.rm = T),
         mean_theta_mean               = mean(theta_mean, na.rm = T)) %>%
  dplyr::filter(year == 2014)

vars2b <- c("vdem_year",
            "country_name",
            "year",
            "mean_t_events_count",
            "mean_intra_war_increase_YN",
            "mean_v2cademmob",    
            "mean_dist_EU_US" ,  
            "mean_v2x_polyarchy", 
            "mean_theta_mean"    
)

final_df5_90_21_b_BDI2015 <- final_df5_90_21_b_BDI2015[vars2b]

BDI2015_ex_table <- merge(BDI2015_ex_weights,
                          final_df5_90_21_b_BDI2015,
                          by = "vdem_year")

BDI2015_ex_table <- BDI2015_ex_table %>% arrange(as.numeric(rank))

BDI2015_only <- final_df5_90_21_b_BDI2015[final_df5_90_21_b_BDI2015$vdem_year ==  "69_2014",]
BDI2015_only$rank   <- NA
BDI2015_only$test01 <- NA
BDI2015_only$Country <- 69
BDI2015_only$country_name <- c("Burundi")   # Seemingly need to hard-code this, something is wrong w Rmd vs. PDF render
BDI2015_only$countryname <- c("Burundi")

BDI2015_ex_table$rank <- as.numeric(BDI2015_ex_table$rank)
BDI2015_only$test01   <- as.numeric(BDI2015_only$test01)

# Bind together
BDI2015_ex_table <- rbind(BDI2015_only, BDI2015_ex_table)

# Cut down
vars3 <- c("rank",
           "country_name",
           "test01",
           "mean_t_events_count",
           "mean_intra_war_increase_YN",
           "mean_v2cademmob",   
           "mean_dist_EU_US"   ,
           "mean_v2x_polyarchy", 
           "mean_theta_mean"    
)

BDI2015_ex_table <- BDI2015_ex_table[vars3]

row.names(BDI2015_ex_table) <- NULL

# Function to display as percentage
percent <- function(x, digits = 1, format = "f", ...) {
  paste0(formatC(x * 100, format = format, digits = digits, ...), "\\%")
}

percent2 <- function(x, digits = 0, format = "f", ...) {
  paste0(formatC(x * 1, format = format, digits = digits, ...), "\\%")
}

# Change display
BDI2015_ex_table$test01  <- percent(as.numeric(BDI2015_ex_table$test01))
BDI2015_ex_table$test01 <- ifelse(BDI2015_ex_table$country_name   == "Burundi",
                                  "--",
                                  BDI2015_ex_table$test01)
BDI2015_ex_table$rank <- ifelse(BDI2015_ex_table$country_name   == "Burundi" ,
                                "--",
                                BDI2015_ex_table$rank)

# Print the table

BDI2015_ex_table_AP <- BDI2015_ex_table %>%
  knitr::kable(caption = "Illustration for democratic sanctions onset: Treated group and control group with weights for 2015.",
               booktabs = T,
               linesep = "",
               escape   = F,
               digits   = 2,
               align = ('rlccccccc'),
               col.names = c("Rank",
                             "Country",
                             "Weight",
                             "Trigger Events/Year (Y/N)",
                             "Increase in Civil War/Year (Y/N)",
                             "Pro-Dem. Protest",
                             "Political Proximity US/EU",
                             "Democracy",
                             "Human Rights"
               )) %>%
  row_spec(1, hline_after = T)  %>%
  row_spec(11, hline_after = T) %>%
  kableExtra::group_rows("Treated: Burundi, democratic sanctions onset in 2015", 1, 1)     %>%
  kableExtra::group_rows("Control group: Countries most and least likely to be sanctioned in 2015, and not in fact sanctioned", 2, 15)     %>%
  add_header_above(c(" " = 7, "Outcomes (not used for weighting)" = 2)) %>%
  kable_styling(font_size = 9) %>%
  footnote(general = c("Values are averages for the following timeframes, as indicated in the body text:",
                       "Trigger events: 2014--15, all others: 2013--14",
                       "Outcome variables are only displayed descriptively and are not used for refinement/weighting.",
                       "Trigger events: binary variable for t and t--1",
                       "Increase in civil war: binary variable for t and t--1",
                       "Pro-democracy protest: roughly -4 (none) to 4 (widespread)",
                       "Political proximity: 0 (very close) to 10 (very different)",
                       "Democracy: 0 (very undemocratic) to 1 (very democratic)",
                       "Human rights: -3 (poor) to +6 (respected)")) %>%
  kableExtra::landscape()

BDI2015_ex_table_AP

cat(BDI2015_ex_table_AP, file = "99-Tab-A08.tex")
cat(BDI2015_ex_table_AP, file = "99-Tab-A08.html")





### Appendix 14: Placebo tests -- non-democracy-related sanctions onset

# Only non-HR/DEM sanctions -- are the effects still negative for DEM and HR?
# Table A9

# Outcome: DEM

# @knitr app-placebo-dem-bal

PM.results <- PanelMatch(lag = 4,                       
                         time.id = "year",
                         unit.id = "country_id",
                         treatment = "only_nondem_sanc_YN",
                         refinement.method = "CBPS.weight",
                         data = final_df5_90_21_b,
                         covs.formula = ~ 
                           #I(lag(t_events_count, 0:4))         +
                           I(lag(intra_war_increase_YN, 0:4))  +
                           #I(lag(v2cademmob, 1:4))          +
                           I(lag(dist_EU_US, 1:4))        ,  
                         qoi = "att",
                         outcome.var = "v2x_polyarchy",       # Democracy outcome
                         lead = 0:3,                         
                         forbid.treatment.reversal = FALSE,
                         use.diagonal.variance.matrix = TRUE,
                         placebo.test = TRUE
)

PE.results <- PanelEstimate(sets = PM.results,
                            number.iterations = 1000,
                            data = final_df5_90_21_b)

# Check the balance
balance <- get_covariate_balance(PM.results$att,
                                 data = final_df5_90_21_b,
                                 covariates = c(
                                   #"intra_war_increase_YN",
                                   #"v2cademmob",
                                   "dist_EU_US",
                                   "v2x_polyarchy",   # Outcome variable of interest
                                   "theta_mean"       # Outcome variable of interest
                                 ),                   ## Error: unused argument (simplify = TRUE)
                                 ylab = "SD",
                                 plot = F)

# A bit of naming and formatting
balance     <- as_tibble(balance)
balance     <- tibble::rownames_to_column(balance,     "Time")

balance$Time <- ifelse(balance$Time == "1", "t--4", balance$Time)
balance$Time <- ifelse(balance$Time == "2", "t--3", balance$Time)
balance$Time <- ifelse(balance$Time == "3", "t--2", balance$Time)
balance$Time <- ifelse(balance$Time == "4", "t--1", balance$Time)
balance$Time <- ifelse(balance$Time == "5", "t",    balance$Time)

balance <- balance %>%
  mutate_all(linebreak) %>%
  knitr::kable(caption = "Placebo tests: Weighted/refined/balanced control groups, covariate balance from t--4 to t+0, both outcomes.",
               booktabs = T,
               escape   = F,
               digits   = 2,
               align = ('lccccc'),
               col.names = c("Time",
                             "Pol. proximity",
                             "Polyarchy (V-Dem)",
                             "Human rights (HRS)"
               )) %>%
  add_header_above(c(" " = 2, "Outcomes (not used for weighting)" = 2)) %>%
  kable_styling(font_size = 9) %>%
  add_footnote(c("Political proximity used at t--4 to t--1; democracy and human rights not used for weighting.",
                 "Standardized mean differences, in SDs."), notation="none")

balance

cat(balance, file = "99-Tab-A09.tex")
cat(balance, file = "99-Tab-A09.html")


### Appendix 14: Placebo tests -- non-democracy-related sanctions onset
#   Figure A8

# @knitr app-placebo-dem

# plot <- plot(PE.results)

#PE.results[["estimates"]]
#plot(PE.results)
#summary(PE.results)

capture <- capture.output(results_table <- summary(PE.results))

results_table <- results_table[["summary"]]
results_table <- as.data.frame(results_table)
results_table <- tibble::rownames_to_column(results_table,     "Time")

results_table$lower <- results_table$`2.5%`
results_table$upper <- results_table$`97.5%`

# Manually calculate 95% CIs just to be safe and to double-check
results_table$man_lower <- results_table$estimate - (1.96*results_table$std.error)
results_table$man_upper <- results_table$estimate + (1.96*results_table$std.error)

# Levels of t, so ggplot doesn't switch to alphabetical
results_table$Time <- as.character(results_table$Time)
results_table$Time <- factor(results_table$Time, levels=unique(results_table$Time))

# Add a zero for t-1, as Cunningham has and previous versions of PanelMatch had -- for ease of interpretation
results_table <- results_table %>% add_row(Time = "t-1",
                                           estimate = 0,
                                           .before = 1)

# Plot
results_plot_dem_placebo <- ggplot(results_table,
                                  aes(Time, estimate, group = 1))+
  geom_hline(yintercept=0) +
  geom_vline(
    aes(xintercept =  stage(Time, after_scale = 1.5)),
    linetype = "dashed") +
  geom_line()+
  geom_errorbar(width=.1, aes(ymin = lower, ymax = upper)) +
  geom_point(shape=21, size=3, fill = "white") +
  scale_y_continuous(limits = c(-0.135, 0.12),
                     breaks = seq(0.12, -0.125, by = -0.02),
                     labels = scales::number_format(accuracy = 0.01))+
  annotate("text", x = 2.98, y= 0.05, label = "Sanctions imposition", hjust = 1)+
  labs(y = "Estimated Difference in Democracy\n(V-Dem Polyarchy, 0-1)",
       x = element_blank()
  ) +
  theme_clean()

results_plot_dem_placebo

ggsave("99-Fig-A08.png", results_plot_dem_placebo, width = 7, height = 2.65, dpi = 300)



### Appendix 14: Placebo tests -- non-democracy-related sanctions onset
#   Figure A9
#   Outcome: HR

# @knitr app-placebo-hr

PM.results <- PanelMatch(lag = 4,                      
                         time.id = "year",
                         unit.id = "country_id",
                         treatment = "only_nondem_sanc_YN",
                         refinement.method = "CBPS.weight",
                         data = final_df5_90_21_b,
                         covs.formula = ~ 
                           I(lag(intra_war_increase_YN, 0:4))  +
                           #I(lag(v2cademmob, 1:4))          +
                           I(lag(dist_EU_US, 1:4))        ,  
                         #I(lag(v2x_polyarchy, 1:3))    +
                         #I(lag(theta_mean, 1:3))          ,
                         qoi = "att",
                         outcome.var = "theta_mean",          # HR outcome
                         lead = 0:3,                 
                         forbid.treatment.reversal = FALSE,
                         use.diagonal.variance.matrix = TRUE,
                         placebo.test = TRUE
)

PE.results <- PanelEstimate(sets = PM.results,
                            number.iterations = 1000,
                            data = final_df5_90_21_b)

# plot <- plot(PE.results)

# Check the balance above for the Dem estimate; these are identical

capture <- capture.output(results_table <- summary(PE.results))

results_table <- results_table[["summary"]]
results_table <- as.data.frame(results_table)
results_table <- tibble::rownames_to_column(results_table,     "Time")

results_table$lower <- results_table$`2.5%`
results_table$upper <- results_table$`97.5%`

# Manually calculate 95% CIs just to be safe and to double-check
results_table$man_lower <- results_table$estimate - (1.96*results_table$std.error)
results_table$man_upper <- results_table$estimate + (1.96*results_table$std.error)

# Levels of t, so ggplot doesn't switch to alphabetical
results_table$Time <- as.character(results_table$Time)
results_table$Time <- factor(results_table$Time, levels=unique(results_table$Time))

# Add a zero for t-1, as Cunningham has and previous versions of PanelMatch had -- for ease of interpretation
results_table <- results_table %>% add_row(Time = "t-1",
                                           estimate = 0,
                                           .before = 1)

# Plot
results_plot_hr_placebo <- ggplot(results_table,
                          aes(Time, estimate, group = 1))+
  geom_hline(yintercept=0) +
  geom_vline(
    aes(xintercept =  stage(Time, after_scale = 1.5)),
    linetype = "dashed") +
  geom_line()+
  geom_errorbar(width=.1, aes(ymin = lower, ymax = upper)) +
  geom_point(shape=21, size=3, fill = "white") +
  scale_y_continuous(limits = c(-0.5, 0.5),
                     breaks = seq(-0.5, 0.5, by = 0.1),
                     labels = scales::number_format(accuracy = 0.1))+
  annotate("text", x = 1.6, y= 0.25, label = "Sanctions imposition", hjust = 0)+
  labs(y = "Estimated Difference in Human Rights\n(HRS, -3.5-5.5)",
       x = element_blank()
  ) +
  theme_clean()

results_plot_hr_placebo

ggsave("99-Fig-A09.png", results_plot_hr_placebo, width = 7, height = 2.65, dpi = 300)





### Figure to describe the Burundi 2015 example
#   Illustration: Burundi 2015 -- Appendix A13, Figure A7

#   @knitr Fig-BDI-2015

df <- read_csv("final_df5_90_21_b.csv")
df$country <- countrycode(df$country_id, "vdem", "country.name")
# 128 is Palestine

df_lim <- df %>%
  group_by(country_id) %>%
  filter(country == "Burundi"    |
         country == "Azerbaijan" |
         country == "Nigeria"    |
         country == "Ethiopia") %>%
  ungroup()
  
Fig_BDI_2015 <- ggplot(df_lim,
       aes(year, v2x_polyarchy)) +
  geom_rect(aes(xmin = 2011,
                xmax = 2018, 
                ymin = -Inf,
                ymax = Inf),
            alpha = 0.01) +
  geom_line(aes(color = as.factor(Dem_Sanc_YN), group = country))+
  scale_color_manual(values = c("darkgrey", "black"),
                     labels = c("No dem./HR-related sanctions imposed", "Dem./HR-related sanctions imposed"))+
  labs(colour = "Treatment status")+
  scale_y_continuous(limits = c(0, 1),
                     breaks = seq(0, 1, by = 0.1))+
  scale_x_continuous(limits = c(1989, 2025.5),
                     breaks = seq(1990, 2024, by = 5))+
  geom_vline(xintercept = 2015,
             linetype = "dashed")+
  annotate("text", x = 2022, y= 0.150, label = "Burundi", hjust = 0, size = 3.5)+
  annotate("text", x = 2022, y= 0.196, label = "Azerbaijan", hjust = 0, size = 3.5)+
  annotate("text", x = 2022, y= 0.493, label = "Nigeria", hjust = 0, size = 3.5)+
  annotate("text", x = 2022, y= 0.289, label = "Ethiopia", hjust = 0, size = 3.5)+
  annotate("text", x = 2015.5, y= 0.05, label = "Sanctions imposed\non Burundi (2015)", hjust = 0, size = 3.5, lineheight = .75)+
  annotate("text", x = 2014.25, y= 0.65, label = "Lag window\n(L = 4; 2011-2015)", hjust = 1, size = 3.5, lineheight = .75)+
  annotate("text", x = 2015.75, y= 0.65, label = "Lead window\n(F = 3; 2015-2018)", hjust = 0, size = 3.5, lineheight = .75)+
  labs(y = "Democracy\n(V-Dem Polyarchy, 0-1)",
       x = element_blank())+
  theme_clean()+
  theme(legend.position = c(0.7, 0.85),
        legend.text=element_text(size = 10),
        legend.title=element_text(size = 10))

Fig_BDI_2015

ggsave("99-Fig-A07.png", Fig_BDI_2015, width = 7, height = 4, dpi = 300)





### Appendix A15: 1990--2004 and 2005--2021 subsets

## Table A10, Figure A10
## @knitr subsets-Dem-bal-90-04

set.seed(123)

final_df5_90_04_b <- read.csv("final_df5_90_04_b.csv")
final_df5_05_21_b <- read.csv("final_df5_05_21_b.csv")

# Dem for 1990-2004
# Smaller lead and lag to get more matches

PM.results <- PanelMatch(lag = 1,                                  # Identical treatment history
                         time.id = "year",
                         unit.id = "country_id",
                         treatment = "Dem_Sanc_YN",
                         refinement.method = "CBPS.weight",
                         data = final_df5_90_04_b,
                         covs.formula = ~ 
                           I(lag(t_events_count, 0:1))         +
                           I(lag(intra_war_increase_YN, 0:1))  +
                           I(lag(v2cademmob, 1:1))          +
                           I(lag(dist_EU_US, 1:1))        ,  
                         qoi = "att",
                         outcome.var = "v2x_polyarchy",
                         lead = 0:3,                          # Two on each "side" of the treatment
                         forbid.treatment.reversal = FALSE,
                         use.diagonal.variance.matrix = TRUE,
                         placebo.test = TRUE
)

# Check balance
balance <- get_covariate_balance(PM.results$att,
                                 data = final_df5_90_04_b,
                                 covariates = c(
                                   "t_events_count",  # Only at 0 and 1
                                   "intra_war_increase_YN",
                                   "v2cademmob",
                                   #"GDPg",
                                   "dist_EU_US"
                                   #"v2x_polyarchy",   # Outcome variable of interest
                                   #"theta_mean"       # Outcome variable of interest
                                 ),                         ## Error: unused argument (simplify = TRUE)
                                 ylab = "SD",
                                 plot = F)

# A bit of naming and formatting
balance     <- as_tibble(balance)
balance     <- tibble::rownames_to_column(balance,     "Time")

balance$Time <- ifelse(balance$Time == "1", "t--1", balance$Time)
balance$Time <- ifelse(balance$Time == "2", "t",    balance$Time)

write_csv(balance, "balance_dem_90_04.csv")

balance_dem_90_04 <- balance %>%      # Is identical to Dem outcome table
  mutate_all(linebreak) %>%
  knitr::kable(caption = "Weighted/refined/balanced control groups: Covariate balance from t--1 to t, outcome: democracy, 1990--2004.",
               booktabs = T,
               escape   = F,
               digits   = 2,
               align = ('lccccc'),
               col.names = c("Time",
                             "Trigger Events",
                             "Intrastate war increase",
                             "Pro-dem. protest",
                             "Pol. proximity"
               )) %>%
  kable_styling(font_size = 9) %>%
  add_footnote(c("Trigger events and conflict increases used for weighting at t--1 to t0, all others at t--1.",
                 "Standardized mean differences, in SDs."), notation="none")

balance_dem_90_04

cat(balance_dem_90_04, file = "99-Tab-A10.tex")
cat(balance_dem_90_04, file = "99-Tab-A10.html")

## @knitr subsets-Dem-90-04

# Results
PE.results <- PanelEstimate(sets = PM.results,
                            number.iterations = 1000,
                            data = final_df5_90_04_b)

#PE.results[["estimates"]]
#plot(PE.results)
#summary(PE.results)

capture <- capture.output(results_table <- summary(PE.results))

results_table <- results_table[["summary"]]
results_table <- as.data.frame(results_table)
results_table <- tibble::rownames_to_column(results_table,     "Time")

results_table$lower <- results_table$`2.5%`
results_table$upper <- results_table$`97.5%`

# Manually calculate 95% CIs just to be safe and to double-check
results_table$man_lower <- results_table$estimate - (1.96*results_table$std.error)
results_table$man_upper <- results_table$estimate + (1.96*results_table$std.error)

# Levels of t, so ggplot doesn't switch to alphabetical
results_table$Time <- as.character(results_table$Time)
results_table$Time <- factor(results_table$Time, levels=unique(results_table$Time))

# Add a zero for t-1, as Cunningham has and previous versions of PanelMatch had -- for ease of interpretation
results_table <- results_table %>% add_row(Time = "t-1",
                                           estimate = 0,
                                           .before = 1)

# Plot
results_plot_dem_90_04 <- ggplot(results_table,
                          aes(Time, estimate, group = 1))+
  geom_hline(yintercept=0) +
  geom_vline(
    aes(xintercept =  stage(Time, after_scale = 1.5)),
    linetype = "dashed") +
  geom_line()+
  geom_errorbar(width=.1, aes(ymin = lower, ymax = upper)) +
  geom_point(shape=21, size=3, fill = "white") +
  scale_y_continuous(limits = c(-0.158, 0.12),
                     breaks = seq(-0.14, 0.12, by = 0.02),
                     labels = scales::number_format(accuracy = 0.01))+
  annotate("text", x = 2, y= 0.07, label = "Sanctions imposition", hjust = 0)+
  labs(y = "Estimated Difference in Democracy\n(V-Dem Polyarchy, 0-1)",
       x = element_blank()
  ) +
  theme_clean()

results_plot_dem_90_04

ggsave("99-Fig-A10.png", results_plot_dem_90_04, width = 7, height = 2.45, dpi = 300)





### Appendix A15: 1990--2004 and 2005--2021 subsets

## Table A11, Figure A11
## @knitr subsets-Dem-bal-05-21

# Dem for 2005-2021
# Smaller lead and lag to get more matches

PM.results <- PanelMatch(lag = 1,                                  # Identical treatment history
                         time.id = "year",
                         unit.id = "country_id",
                         treatment = "Dem_Sanc_YN",
                         refinement.method = "CBPS.weight",
                         data = final_df5_05_21_b,
                         covs.formula = ~ 
                           I(lag(t_events_count, 0:1))         +
                           I(lag(intra_war_increase_YN, 0:1))  +
                           I(lag(v2cademmob, 1:1))          +
                           I(lag(dist_EU_US, 1:1))        ,  
                         qoi = "att",
                         outcome.var = "v2x_polyarchy",
                         lead = 0:3,               
                         forbid.treatment.reversal = FALSE,
                         use.diagonal.variance.matrix = TRUE,
                         placebo.test = TRUE
)

# Check balance
balance <- get_covariate_balance(PM.results$att,
                                 data = final_df5_05_21_b,
                                 covariates = c(
                                   "t_events_count",  
                                   "intra_war_increase_YN",
                                   "v2cademmob",
                                   "dist_EU_US"
                                   #"v2x_polyarchy",   # Outcome variable of interest
                                   #"theta_mean"       # Outcome variable of interest
                                 ),                         ## Error: unused argument (simplify = TRUE)
                                 ylab = "SD",
                                 plot = F)

# A bit of naming and formatting
balance     <- as_tibble(balance)
balance     <- tibble::rownames_to_column(balance,     "Time")

balance$Time <- ifelse(balance$Time == "1", "t--1", balance$Time)
balance$Time <- ifelse(balance$Time == "2", "t",    balance$Time)

write_csv(balance, "balance_dem_05_21.csv")

balance_dem_05_21 <- balance %>%      # Is identical to Dem outcome table
  mutate_all(linebreak) %>%
  knitr::kable(caption = "Weighted/refined/balanced control groups: Covariate balance from t--1 to t, outcome: democracy, 2005--2021.",
               booktabs = T,
               escape   = F,
               digits   = 2,
               align = ('lccccc'),
               col.names = c("Time",
                             "Trigger Events",
                             "Intrastate war increase",
                             "Pro-dem. protest",
                             "Pol. proximity"
               )) %>%
  kable_styling(font_size = 9) %>%
  add_footnote(c("Trigger events and conflict increases used for weighting at t--1 to t0, all others at t--1.",
                 "Standardized mean differences, in SDs."), notation="none")

balance_dem_05_21

cat(balance_dem_05_21, file = "99-Tab-A11.tex")
cat(balance_dem_05_21, file = "99-Tab-A11.html")

## @knitr subsets-Dem-05-21

# Results
PE.results <- PanelEstimate(sets = PM.results,
                            number.iterations = 1000,
                            data = final_df5_05_21_b)

#PE.results[["estimates"]]
#plot(PE.results)
#summary(PE.results)

capture <- capture.output(results_table <- summary(PE.results))

results_table <- results_table[["summary"]]
results_table <- as.data.frame(results_table)
results_table <- tibble::rownames_to_column(results_table,     "Time")

results_table$lower <- results_table$`2.5%`
results_table$upper <- results_table$`97.5%`

# Manually calculate 95% CIs just to be safe and to double-check
results_table$man_lower <- results_table$estimate - (1.96*results_table$std.error)
results_table$man_upper <- results_table$estimate + (1.96*results_table$std.error)



# Levels of t, so ggplot doesn't switch to alphabetical
results_table$Time <- as.character(results_table$Time)
results_table$Time <- factor(results_table$Time, levels=unique(results_table$Time))

# Add a zero for t-1, as Cunningham has and previous versions of PanelMatch had -- for ease of interpretation
results_table <- results_table %>% add_row(Time = "t-1",
                                           estimate = 0,
                                           .before = 1)

# Plot
results_plot_dem_05_21 <- ggplot(results_table,
                                 aes(Time, estimate, group = 1))+
  geom_hline(yintercept=0) +
  geom_vline(
    aes(xintercept =  stage(Time, after_scale = 1.5)),
    linetype = "dashed") +
  geom_line()+
  geom_errorbar(width=.1, aes(ymin = lower, ymax = upper)) +
  geom_point(shape=21, size=3, fill = "white") +
  scale_y_continuous(limits = c(-0.158, 0.12),
                     breaks = seq(-0.14, 0.12, by = 0.02),
                     labels = scales::number_format(accuracy = 0.01))+
  annotate("text", x = 2, y= 0.07, label = "Sanctions imposition", hjust = 0)+
  labs(y = "Estimated Difference in Democracy\n(V-Dem Polyarchy, 0-1)",
       x = element_blank()
  ) +
  theme_clean()

results_plot_dem_05_21

ggsave("99-Fig-A11.png", results_plot_dem_05_21, width = 7, height = 2.45, dpi = 300)






### Appendix A5n -- cfedible threats

# @knitr Tab-credible
# Table A3

# Threats and imposition tables
# Threat data only available in EUSANCT

# Open C-Y dataset and EUSANCT cases
EUSANCT_Case_level_Dataset     <- read_csv("EUSANCT_Case-level-Dataset.csv")

EUSANCT_Case_level_Dataset <- EUSANCT_Case_level_Dataset %>%
  filter(!grepl("ICC", comments))

EUSANCT_Case_level_Dataset$dem_sanc <- 0
EUSANCT_Case_level_Dataset$dem_sanc[EUSANCT_Case_level_Dataset$issue1 == 12 |
                                    EUSANCT_Case_level_Dataset$issue1 == 13 |
                                    EUSANCT_Case_level_Dataset$issue2 == 12 |
                                    EUSANCT_Case_level_Dataset$issue2 == 13 |
                                    EUSANCT_Case_level_Dataset$issue3 == 12 |
                                    EUSANCT_Case_level_Dataset$issue3 == 13 ] <- "Democracy"

EUSANCT_Case_level_Dataset$hr_sanc <- 0
EUSANCT_Case_level_Dataset$hr_sanc[EUSANCT_Case_level_Dataset$issue1 == 8 |
                                   EUSANCT_Case_level_Dataset$issue2 == 8 |
                                   EUSANCT_Case_level_Dataset$issue3 == 8 ] <- "Human rights"

EUSANCT_Case_level_Dataset$demhr_sanc <- EUSANCT_Case_level_Dataset$dem_sanc

EUSANCT_Case_level_Dataset$demhr_sanc <- ifelse(EUSANCT_Case_level_Dataset$hr_sanc == "Human rights",
                                                EUSANCT_Case_level_Dataset$hr_sanc,
                                                EUSANCT_Case_level_Dataset$dem_sanc)

EUSANCT_Case_level_Dataset$demhr_sanc <- ifelse((EUSANCT_Case_level_Dataset$dem_sanc == "Democracy" &
                                                 EUSANCT_Case_level_Dataset$hr_sanc == "Human rights"),
                                                "Dem. and HR",
                                                EUSANCT_Case_level_Dataset$demhr_sanc)

EUSANCT_Case_level_Dataset$credible <- NA
EUSANCT_Case_level_Dataset$credible[EUSANCT_Case_level_Dataset$threat         == 1 &
                                      EUSANCT_Case_level_Dataset$threat_success == 0 &
                                      EUSANCT_Case_level_Dataset$sanction       == 1] <- 1

EUSANCT_Case_level_Dataset$follow_up <- NA
EUSANCT_Case_level_Dataset$follow_up[EUSANCT_Case_level_Dataset$threat         == 1 &
                                       EUSANCT_Case_level_Dataset$threat_success == 0] <- 1

# Re: above -- codebook says issue numbers are used for "otherissue" var, this is not the case?

# Give year variable for earliest year
EUSANCT_Case_level_Dataset$year <- substr(EUSANCT_Case_level_Dataset$caseid, 1, 4)

# COWN country year
EUSANCT_Case_level_Dataset$cname <- countrycode(EUSANCT_Case_level_Dataset$targetstate,
                                                origin = "cown",
                                                destination = "country.name")

EUSANCT_Case_level_Dataset$cow_year <- paste(EUSANCT_Case_level_Dataset$targetstate,
                                             EUSANCT_Case_level_Dataset$year,
                                             sep = "_"  )

# Create the table

threat_table2 <- EUSANCT_Case_level_Dataset %>%
  group_by(demhr_sanc) %>%
  summarize(threat_sum = sum(threat, na.rm=TRUE),
            threat_successful = sum(threat_success, na.rm=TRUE),
            threat_successful_share = (threat_successful/threat_sum),
            credible_sum = sum(credible, na.rm=TRUE),
            follow_up_sum = sum(follow_up, na.rm = TRUE),
            credible_share = (credible_sum/follow_up_sum),
            not_credible_share = 1-credible_share)

threat_table2 <- subset(threat_table2, select = -credible_sum)
threat_table2 <- subset(threat_table2, select = -follow_up_sum)

# Make a table
# Display certain columns as %
# Function to display as percentage
percent <- function(x, digits = 1, format = "f", ...) {
  paste0(formatC(x * 100, format = format, digits = digits, ...), "%")
}

# Change display
threat_table2$threat_successful_share  <- percent(threat_table2$threat_successful_share)
threat_table2$credible_share           <- percent(threat_table2$credible_share)
threat_table2$not_credible_share       <- percent(threat_table2$not_credible_share)

threat_table2 <- threat_table2 %>%
  slice(c(1, 3, 4, 2))

colnames(threat_table2) <- c("Objective", "Threats", "Sum", "Share", "Share", "Share")

threat_table2[1, 1] = "Other issues"

# Knit
tab_credible <- knitr::kable(
  head(threat_table2, 21),
  longtable = FALSE,
  booktabs = TRUE,
  digits = 3,
  align=('lccccc'),
  caption = 'The success of sanctions threats by objective, 1989--2015.'
) %>%
  add_header_above(c(" " = 2, "Threat successful" = 2, "Threat credible" = 1, "Threat not credible" = 1)) %>%
  kable_styling(latex_options = "HOLD_position") %>%
  add_footnote("US ICC-related sanctions removed.", notation = "none")

tab_credible

cat(tab_credible, file = "99-Tab-A03.tex")
cat(tab_credible, file = "99-Tab-A03.html")






### Appendix A6 -- success rates of imposed sanctions by aim

# Table A4
# @knitr Tab-success-1

success_table_split <- EUSANCT_Case_level_Dataset %>%
  filter(sanction == TRUE) %>%
  filter(demhr_sanc != 0)    %>%
  group_by(demhr_sanc) %>%
  summarize(success_sum  = sum(sanctions_success, na.rm=TRUE),
            count        = n(),
            success_rate = success_sum/count)

success_table_all <- EUSANCT_Case_level_Dataset %>%
  filter(sanction == TRUE) %>%
  filter(demhr_sanc != 0)    %>%
  summarize(success_sum  = sum(sanctions_success, na.rm=TRUE),
            count        = n(),
            success_rate = success_sum/count)

success_table_all$demhr_sanc <- "Total (1989--2015)"

# Pre-2005 overall success
success_table_all_pre2005 <- EUSANCT_Case_level_Dataset %>%
  filter(sanction == TRUE) %>%
  filter(demhr_sanc != 0) %>%
  filter(year < 2005) %>%
  summarize(success_sum  = sum(sanctions_success, na.rm = TRUE),
            count        = n(),
            success_rate = success_sum / count)

success_table_all_pre2005$demhr_sanc <- "Total (1989--2004)"

# Post-2005 overall success
success_table_all_post2005 <- EUSANCT_Case_level_Dataset %>%
  filter(sanction == TRUE) %>%
  filter(demhr_sanc != 0) %>%
  filter(year >= 2005) %>%
  summarize(success_sum  = sum(sanctions_success, na.rm = TRUE),
            count        = n(),
            success_rate = success_sum / count)

success_table_all_post2005$demhr_sanc <- "Total (2005--2015)"

success_table <- rbind(success_table_split,
                       success_table_all,
                       success_table_all_pre2005,
                       success_table_all_post2005)

success_table <- success_table %>%
  slice(c(1, 3, 2, 5, 6, 4))

colnames(success_table) <- c("", "Successes", "Impositions", "Success rate")

success_table$`Success rate` <- percent(success_table$`Success rate`)

# Knit
Tab_A04 <- knitr::kable(
  success_table,
  longtable = FALSE,
  booktabs = TRUE,
  digits = 3,
  align=('lccccc'),
  caption = 'The success of imposed sanctions by objective, 1989--2015 (EUSANCT).'
 ) %>%
  kable_styling(latex_options = "HOLD_position") %>%
  row_spec(3, hline_after = TRUE) %>%
  row_spec(5, hline_after = TRUE)  

Tab_A04

cat(Tab_A04, file = "99-Tab-A04.tex")
cat(Tab_A04, file = "99-Tab-A04.html")






### Appendix A16 -- Subset analyses for democracy and human rights

# Figure A12
# @knitr Fig-sdod
# Notation for the knitr tags in the following: 's' for sanctions aim, 'o' for outcome measure, 'd' for democracy, 'h' for human rights

set.seed(123)

final_df5_90_21 <- read.csv("final_df5_90_21_b.csv")

PM.results <- PanelMatch(lag = 4,                                  # Identical treatment history
                         time.id = "year",
                         unit.id = "country_id",
                         treatment = "DM_YN",
                         refinement.method = "CBPS.weight",
                         data = final_df5_90_21,
                         covs.formula = ~ 
                           I(lag(t_events_count, 0:4))         +
                           I(lag(intra_war_increase_YN, 0:4))  +
                           I(lag(v2cademmob, 1:4))          +
                           I(lag(dist_EU_US, 1:4))        ,  
                         qoi = "att",
                         outcome.var = "v2x_polyarchy",
                         lead = 0:3,                          # Three on each "side" of the treatment
                         forbid.treatment.reversal = FALSE,
                         use.diagonal.variance.matrix = TRUE,
                         placebo.test = TRUE
)

# Check balance
balance <- get_covariate_balance(PM.results$att,
                                 data = final_df5_90_21,
                                 covariates = c(
                                   "t_events_count",  # Only at 0 and 1
                                   "intra_war_increase_YN",
                                   "v2cademmob",
                                   "dist_EU_US"
                                 ), 
                                 ylab = "SD",
                                 plot = F)

# Won't print the balance table for the the appendix -- the balance looks good

# Results
PE.results <- PanelEstimate(sets = PM.results,
                            number.iterations = 1000,
                            data = final_df5_90_21)

#PE.results[["estimates"]]
#plot(PE.results)
#summary(PE.results)

capture <- capture.output(results_table <- summary(PE.results))

results_table <- results_table[["summary"]]
results_table <- as.data.frame(results_table)
results_table <- tibble::rownames_to_column(results_table,     "Time")

results_table$lower <- results_table$`2.5%`
results_table$upper <- results_table$`97.5%`

# Manually calculate 95% CIs just to be safe and to double-check
results_table$man_lower <- results_table$estimate - (1.96*results_table$std.error)
results_table$man_upper <- results_table$estimate + (1.96*results_table$std.error)

# Levels of t, so ggplot doesn't switch to alphabetical
results_table$Time <- as.character(results_table$Time)
results_table$Time <- factor(results_table$Time, levels=unique(results_table$Time))

# Add a zero for t-1, as Cunningham has and previous versions of PanelMatch had -- for ease of interpretation
results_table <- results_table %>% add_row(Time = "t-1",
                                           estimate = 0,
                                           .before = 1)

### Placebo tests -- DM only

set.seed(123)

placebo_tab <- placebo_test(PM.results, data = final_df5_90_21, number.iterations = 1000, plot = F, confidence.level = 0.95)

dm_estimates <- placebo_tab[["bootstrapped.estimates"]]
dm_estimates <- as.data.frame(dm_estimates)

dm_estimates_mean_t4 <- mean(dm_estimates$`t-4`)
dm_estimates_mean_t3 <- mean(dm_estimates$`t-3`)
dm_estimates_mean_t2 <- mean(dm_estimates$`t-2`)

# https://github.com/insongkim/PanelMatch/issues/132#issuecomment-1795026977
# install_github("insongkim/PanelMatch", dependencies=TRUE, ref = "se_comparison")

pt <- placebo_test(PM.results, data = final_df5_90_21, se.method = "unconditional", plot = FALSE)

ses_past <- pt[["standard.errors"]]

# Create table
dm_past_estimates <- data.frame(
  Time = c("t-4", "t-3", "t-2"),
  estimate = c(dm_estimates_mean_t4, dm_estimates_mean_t3, dm_estimates_mean_t2),
  std.error = ses_past,
  `2.5%` = NA,
  `97.5%` = NA,
  lower = NA,
  upper = NA,
  man_lower = NA,
  man_upper = NA
)

# Manually calculate 95% CIs just to be safe and to double-check, as above --

dm_past_estimates$man_lower <- dm_past_estimates$estimate - (1.96*dm_past_estimates$std.error)
dm_past_estimates$man_upper <- dm_past_estimates$estimate + (1.96*dm_past_estimates$std.error)

# These indeed seem to be identical to those in the figure

dm_past_estimates$lower <- dm_past_estimates$man_lower
dm_past_estimates$upper <- dm_past_estimates$man_upper

# Merge
# Add these to the t+1, t+2,... estimates table created above

estimates_full <- rbind(dm_past_estimates, setNames(results_table, names(dm_past_estimates)))

estimates_full$Time <- factor(estimates_full$Time, levels = c("t-4", "t-3", "t-2", "t-1", "t+0", "t+1", "t+2", "t+3"))

# Plot the whole thing
results_plot_sdod <- ggplot(estimates_full,
                          aes(Time, estimate, group = 1))+
  geom_hline(yintercept=0) +
  geom_vline(
    aes(xintercept =  stage(Time, after_scale = 4.5)),
    linetype = "dashed") +
  geom_line()+
  geom_errorbar(width=.1, aes(ymin = lower, ymax = upper)) +
  geom_point(shape=21, size=3, fill = "white") +
  scale_y_continuous(limits = c(-0.2, 0.1),
                     breaks = seq(-0.2, 0.1, by = 0.05),
                     labels = scales::number_format(accuracy = 0.01))+
  annotate("text", x = 4.35, y= 0.075, label = "Sanctions imposition", hjust = 1)+
  labs(y = "Estimated Difference in Democracy\n(V-Dem, 0--1)",
       x = element_blank()
  ) +
  theme_clean()

results_plot_sdod

ggsave("99-Fig-A12.png", results_plot_sdod, width = 7, height = 2.75, dpi = 300)




# Figure A14
# @knitr Fig-shod

set.seed(123)

final_df5_90_21 <- read.csv("final_df5_90_21_b.csv")

PM.results <- PanelMatch(lag = 4,                                  # Identical treatment history
                         time.id = "year",
                         unit.id = "country_id",
                         treatment = "HR_YN",
                         refinement.method = "CBPS.weight",
                         data = final_df5_90_21,
                         covs.formula = ~ 
                           I(lag(t_events_count, 0:4))         +
                           I(lag(intra_war_increase_YN, 0:4))  +
                           I(lag(v2cademmob, 1:4))          +
                           I(lag(dist_EU_US, 1:4))        ,  
                         qoi = "att",
                         outcome.var = "v2x_polyarchy",
                         lead = 0:3,                          # Three on each "side" of the treatment
                         forbid.treatment.reversal = FALSE,
                         use.diagonal.variance.matrix = TRUE,
                         placebo.test = TRUE
)

# Check balance
balance <- get_covariate_balance(PM.results$att,
                                 data = final_df5_90_21,
                                 covariates = c(
                                   "t_events_count",  # Only at 0 and 1
                                   "intra_war_increase_YN",
                                   "v2cademmob",
                                   "dist_EU_US"
                                 ), 
                                 ylab = "SD",
                                 plot = F)

# Won't print the balance table for the the appendix -- the balance looks good

# Results
PE.results <- PanelEstimate(sets = PM.results,
                            number.iterations = 1000,
                            data = final_df5_90_21)

#PE.results[["estimates"]]
#plot(PE.results)
#summary(PE.results)

capture <- capture.output(results_table <- summary(PE.results))

results_table <- results_table[["summary"]]
results_table <- as.data.frame(results_table)
results_table <- tibble::rownames_to_column(results_table,     "Time")

results_table$lower <- results_table$`2.5%`
results_table$upper <- results_table$`97.5%`

# Manually calculate 95% CIs just to be safe and to double-check
results_table$man_lower <- results_table$estimate - (1.96*results_table$std.error)
results_table$man_upper <- results_table$estimate + (1.96*results_table$std.error)

# Levels of t, so ggplot doesn't switch to alphabetical
results_table$Time <- as.character(results_table$Time)
results_table$Time <- factor(results_table$Time, levels=unique(results_table$Time))

# Add a zero for t-1, as Cunningham has and previous versions of PanelMatch had -- for ease of interpretation
results_table <- results_table %>% add_row(Time = "t-1",
                                           estimate = 0,
                                           .before = 1)

### Placebo tests -- HR only

set.seed(123)

placebo_tab <- placebo_test(PM.results, data = final_df5_90_21, number.iterations = 1000, plot = F, confidence.level = 0.95)

hr_estimates <- placebo_tab[["bootstrapped.estimates"]]
hr_estimates <- as.data.frame(hr_estimates)

hr_estimates_mean_t4 <- mean(hr_estimates$`t-4`)
hr_estimates_mean_t3 <- mean(hr_estimates$`t-3`)
hr_estimates_mean_t2 <- mean(hr_estimates$`t-2`)

# https://github.com/insongkim/PanelMatch/issues/132#issuecomment-1795026977
# install_github("insongkim/PanelMatch", dependencies=TRUE, ref = "se_comparison")

pt <- placebo_test(PM.results, data = final_df5_90_21, se.method = "unconditional", plot = FALSE)

ses_past <- pt[["standard.errors"]]

# Create table
hr_past_estimates <- data.frame(
  Time = c("t-4", "t-3", "t-2"),
  estimate = c(hr_estimates_mean_t4, hr_estimates_mean_t3, hr_estimates_mean_t2),
  std.error = ses_past,
  `2.5%` = NA,
  `97.5%` = NA,
  lower = NA,
  upper = NA,
  man_lower = NA,
  man_upper = NA
)

# Manually calculate 95% CIs just to be safe and to double-check, as above --

hr_past_estimates$man_lower <- hr_past_estimates$estimate - (1.96*hr_past_estimates$std.error)
hr_past_estimates$man_upper <- hr_past_estimates$estimate + (1.96*hr_past_estimates$std.error)

# These indeed seem to be identical to those in the figure

hr_past_estimates$lower <- hr_past_estimates$man_lower
hr_past_estimates$upper <- hr_past_estimates$man_upper

# Merge
# Add these to the t+1, t+2,... estimates table created above

estimates_full <- rbind(hr_past_estimates, setNames(results_table, names(hr_past_estimates)))

estimates_full$Time <- factor(estimates_full$Time, levels = c("t-4", "t-3", "t-2", "t-1", "t+0", "t+1", "t+2", "t+3"))

# Plot the whole thing
results_plot_shod <- ggplot(estimates_full,
                          aes(Time, estimate, group = 1))+
  geom_hline(yintercept=0) +
  geom_vline(
    aes(xintercept =  stage(Time, after_scale = 4.5)),
    linetype = "dashed") +
  geom_line()+
  geom_errorbar(width=.1, aes(ymin = lower, ymax = upper)) +
  geom_point(shape=21, size=3, fill = "white") +
  scale_y_continuous(limits = c(-0.2, 0.1),
                     breaks = seq(-0.2, 0.1, by = 0.05),
                     labels = scales::number_format(accuracy = 0.01))+
  annotate("text", x = 4.35, y= 0.075, label = "Sanctions imposition", hjust = 1)+
  labs(y = "Estimated Difference in Democracy\n(V-Dem, 0--1)",
       x = element_blank()
  ) +
  theme_clean()

results_plot_shod

ggsave("99-Fig-A14.png", results_plot_shod, width = 7, height = 2.75, dpi = 300)



# Figure A13
# @knitr Fig-sdoh

set.seed(123)

final_df5_90_21 <- read.csv("final_df5_90_21_b.csv")

PM.results <- PanelMatch(lag = 4,                                  # Identical treatment history
                         time.id = "year",
                         unit.id = "country_id",
                         treatment = "DM_YN",
                         refinement.method = "CBPS.weight",
                         data = final_df5_90_21,
                         covs.formula = ~ 
                           I(lag(t_events_count, 0:4))         +
                           I(lag(intra_war_increase_YN, 0:4))  +
                           I(lag(v2cademmob, 1:4))          +
                           I(lag(dist_EU_US, 1:4))        ,  
                         qoi = "att",
                         outcome.var = "theta_mean",
                         lead = 0:3,                          # Three on each "side" of the treatment
                         forbid.treatment.reversal = FALSE,
                         use.diagonal.variance.matrix = TRUE,
                         placebo.test = TRUE
)

# Check balance
balance <- get_covariate_balance(PM.results$att,
                                 data = final_df5_90_21,
                                 covariates = c(
                                   "t_events_count",  # Only at 0 and 1
                                   "intra_war_increase_YN",
                                   "v2cademmob",
                                   "dist_EU_US"
                                 ), 
                                 ylab = "SD",
                                 plot = F)

# Won't print the balance table for the the appendix -- the balance looks good

# Results
PE.results <- PanelEstimate(sets = PM.results,
                            number.iterations = 1000,
                            data = final_df5_90_21)

#PE.results[["estimates"]]
#plot(PE.results)
#summary(PE.results)

capture <- capture.output(results_table <- summary(PE.results))

results_table <- results_table[["summary"]]
results_table <- as.data.frame(results_table)
results_table <- tibble::rownames_to_column(results_table,     "Time")

results_table$lower <- results_table$`2.5%`
results_table$upper <- results_table$`97.5%`

# Manually calculate 95% CIs just to be safe and to double-check
results_table$man_lower <- results_table$estimate - (1.96*results_table$std.error)
results_table$man_upper <- results_table$estimate + (1.96*results_table$std.error)

# Levels of t, so ggplot doesn't switch to alphabetical
results_table$Time <- as.character(results_table$Time)
results_table$Time <- factor(results_table$Time, levels=unique(results_table$Time))

# Add a zero for t-1, as Cunningham has and previous versions of PanelMatch had -- for ease of interpretation
results_table <- results_table %>% add_row(Time = "t-1",
                                           estimate = 0,
                                           .before = 1)

### Placebo tests -- DM only

set.seed(123)

placebo_tab <- placebo_test(PM.results, data = final_df5_90_21, number.iterations = 1000, plot = F, confidence.level = 0.95)

dm_estimates <- placebo_tab[["bootstrapped.estimates"]]
dm_estimates <- as.data.frame(dm_estimates)

dm_estimates_mean_t4 <- mean(dm_estimates$`t-4`)
dm_estimates_mean_t3 <- mean(dm_estimates$`t-3`)
dm_estimates_mean_t2 <- mean(dm_estimates$`t-2`)

# https://github.com/insongkim/PanelMatch/issues/132#issuecomment-1795026977
# install_github("insongkim/PanelMatch", dependencies=TRUE, ref = "se_comparison")

pt <- placebo_test(PM.results, data = final_df5_90_21, se.method = "unconditional", plot = FALSE)

ses_past <- pt[["standard.errors"]]

# Create table
dm_past_estimates <- data.frame(
  Time = c("t-4", "t-3", "t-2"),
  estimate = c(dm_estimates_mean_t4, dm_estimates_mean_t3, dm_estimates_mean_t2),
  std.error = ses_past,
  `2.5%` = NA,
  `97.5%` = NA,
  lower = NA,
  upper = NA,
  man_lower = NA,
  man_upper = NA
)

# Manually calculate 95% CIs just to be safe and to double-check, as above --

dm_past_estimates$man_lower <- dm_past_estimates$estimate - (1.96*dm_past_estimates$std.error)
dm_past_estimates$man_upper <- dm_past_estimates$estimate + (1.96*dm_past_estimates$std.error)

# These indeed seem to be identical to those in the figure

dm_past_estimates$lower <- dm_past_estimates$man_lower
dm_past_estimates$upper <- dm_past_estimates$man_upper

# Merge
# Add these to the t+1, t+2,... estimates table created above

estimates_full <- rbind(dm_past_estimates, setNames(results_table, names(dm_past_estimates)))

estimates_full$Time <- factor(estimates_full$Time, levels = c("t-4", "t-3", "t-2", "t-1", "t+0", "t+1", "t+2", "t+3"))

# Plot the whole thing
results_plot_sdoh <- ggplot(estimates_full,
                              aes(Time, estimate, group = 1))+
  geom_hline(yintercept=0) +
  geom_vline(
    aes(xintercept =  stage(Time, after_scale = 4.5)),
    linetype = "dashed") +
  geom_line()+
  geom_errorbar(width=.1, aes(ymin = lower, ymax = upper)) +
  geom_point(shape=21, size=3, fill = "white") +
  scale_y_continuous(limits = c(-0.5, 0.5),
                     breaks = seq(-0.5, 0.5, by = 0.1),
                     labels = scales::number_format(accuracy = 0.1))+
  annotate("text", x = 4.35, y= 0.25, label = "Sanctions imposition", hjust = 1)+
  labs(y = "Estimated Difference in Human Rights\n(HRS, -3.5-5.5)",
       x = element_blank()
  ) +
  theme_clean()

results_plot_sdoh

ggsave("99-Fig-A13.png", results_plot_sdoh, width = 7, height = 2.75, dpi = 300)



# Figure A15
# @knitr Fig-shoh

set.seed(123)

final_df5_90_21 <- read.csv("final_df5_90_21_b.csv")

PM.results <- PanelMatch(lag = 4,                                  # Identical treatment history
                         time.id = "year",
                         unit.id = "country_id",
                         treatment = "HR_YN",
                         refinement.method = "CBPS.weight",
                         data = final_df5_90_21,
                         covs.formula = ~ 
                           I(lag(t_events_count, 0:4))         +
                           I(lag(intra_war_increase_YN, 0:4))  +
                           I(lag(v2cademmob, 1:4))          +
                           I(lag(dist_EU_US, 1:4))        ,  
                         qoi = "att",
                         outcome.var = "theta_mean",
                         lead = 0:3,                          # Three on each "side" of the treatment
                         forbid.treatment.reversal = FALSE,
                         use.diagonal.variance.matrix = TRUE,
                         placebo.test = TRUE
)

# Check balance
balance <- get_covariate_balance(PM.results$att,
                                 data = final_df5_90_21,
                                 covariates = c(
                                   "t_events_count",  # Only at 0 and 1
                                   "intra_war_increase_YN",
                                   "v2cademmob",
                                   "dist_EU_US"
                                 ), 
                                 ylab = "SD",
                                 plot = F)

# Won't print the balance table for the the appendix -- the balance looks good

# Results
PE.results <- PanelEstimate(sets = PM.results,
                            number.iterations = 1000,
                            data = final_df5_90_21)

#PE.results[["estimates"]]
#plot(PE.results)
#summary(PE.results)

capture <- capture.output(results_table <- summary(PE.results))

results_table <- results_table[["summary"]]
results_table <- as.data.frame(results_table)
results_table <- tibble::rownames_to_column(results_table,     "Time")

results_table$lower <- results_table$`2.5%`
results_table$upper <- results_table$`97.5%`

# Manually calculate 95% CIs just to be safe and to double-check
results_table$man_lower <- results_table$estimate - (1.96*results_table$std.error)
results_table$man_upper <- results_table$estimate + (1.96*results_table$std.error)

# Levels of t, so ggplot doesn't switch to alphabetical
results_table$Time <- as.character(results_table$Time)
results_table$Time <- factor(results_table$Time, levels=unique(results_table$Time))

# Add a zero for t-1, as Cunningham has and previous versions of PanelMatch had -- for ease of interpretation
results_table <- results_table %>% add_row(Time = "t-1",
                                           estimate = 0,
                                           .before = 1)

### Placebo tests -- HR only

set.seed(123)

placebo_tab <- placebo_test(PM.results, data = final_df5_90_21, number.iterations = 1000, plot = F, confidence.level = 0.95)

hr_estimates <- placebo_tab[["bootstrapped.estimates"]]
hr_estimates <- as.data.frame(hr_estimates)

hr_estimates_mean_t4 <- mean(hr_estimates$`t-4`)
hr_estimates_mean_t3 <- mean(hr_estimates$`t-3`)
hr_estimates_mean_t2 <- mean(hr_estimates$`t-2`)

# https://github.com/insongkim/PanelMatch/issues/132#issuecomment-1795026977
# install_github("insongkim/PanelMatch", dependencies=TRUE, ref = "se_comparison")

pt <- placebo_test(PM.results, data = final_df5_90_21, se.method = "unconditional", plot = FALSE)

ses_past <- pt[["standard.errors"]]

# Create table
hr_past_estimates <- data.frame(
  Time = c("t-4", "t-3", "t-2"),
  estimate = c(hr_estimates_mean_t4, hr_estimates_mean_t3, hr_estimates_mean_t2),
  std.error = ses_past,
  `2.5%` = NA,
  `97.5%` = NA,
  lower = NA,
  upper = NA,
  man_lower = NA,
  man_upper = NA
)

# Manually calculate 95% CIs just to be safe and to double-check, as above --

hr_past_estimates$man_lower <- hr_past_estimates$estimate - (1.96*hr_past_estimates$std.error)
hr_past_estimates$man_upper <- hr_past_estimates$estimate + (1.96*hr_past_estimates$std.error)

# These indeed seem to be identical to those in the figure

hr_past_estimates$lower <- hr_past_estimates$man_lower
hr_past_estimates$upper <- hr_past_estimates$man_upper

# Merge
# Add these to the t+1, t+2,... estimates table created above

estimates_full <- rbind(hr_past_estimates, setNames(results_table, names(hr_past_estimates)))

estimates_full$Time <- factor(estimates_full$Time, levels = c("t-4", "t-3", "t-2", "t-1", "t+0", "t+1", "t+2", "t+3"))

# Plot the whole thing
results_plot_shoh <- ggplot(estimates_full,
                              aes(Time, estimate, group = 1))+
  geom_hline(yintercept=0) +
  geom_vline(
    aes(xintercept =  stage(Time, after_scale = 4.5)),
    linetype = "dashed") +
  geom_line()+
  geom_errorbar(width=.1, aes(ymin = lower, ymax = upper)) +
  geom_point(shape=21, size=3, fill = "white") +
  scale_y_continuous(limits = c(-0.5, 0.5),
                     breaks = seq(-0.5, 0.5, by = 0.1),
                     labels = scales::number_format(accuracy = 0.1))+
  annotate("text", x = 4.35, y= 0.25, label = "Sanctions imposition", hjust = 1)+
  labs(y = "Estimated Difference in Human Rights\n(HRS, -3.5--5.5)",
       x = element_blank()
  ) +
  theme_clean()

results_plot_shoh

ggsave("99-Fig-A15.png", results_plot_shoh, width = 7, height = 2.75, dpi = 300)





### Appendix A8: excluding UN sanctions

# @knitr Fig-UN-sanc-1
# Figure A4 -- democracy outcome

# First recompile the main dataframe with the UN excluded
# The following is a simplified version of the IST code as used above.

IST <- read_excel("IST_2.0_20230624_lim.xlsx")

# Clean dates
IST$start <- dmy(IST$startdate)
IST$end   <- dmy(IST$terdate)
IST$start_year <- year(IST$start)
IST$end_year   <- year(IST$end)

# Fill "2050" for ongoing episodes
IST$end_year <- ifelse(is.na(IST$end_year), 2050, IST$end_year)

# "Any sanction" variable
IST$any_sanc_YN <- 1

# Democracy and HR sanctions YN/combined
IST$DM_YN       <- grepl("DM", IST$goals, fixed = TRUE)
IST$HR_YN       <- grepl("HR", IST$goals, fixed = TRUE)

# Remove Human Trafficking from HR_YN
# See manuscript for reasoning
IST$HT_YN       <- grepl("Human Trafficking Act", IST$comment, fixed = TRUE)
IST$HR_YN       <- ifelse((IST$HT_YN == TRUE), FALSE, IST$HR_YN)

# Final Dem Sanc variable (= HR and/or DM)
IST$Dem_Sanc_YN <- ifelse((IST$DM_YN == TRUE | IST$HR_YN == TRUE), 1, 0)

# Senders: Keep only EU/EEC and US -- this is the difference to the main analysis
# vS/W only include US and EU, but UN sanctions  by definition require US, FRA, GBR support.
IST <- IST %>% filter(sender == 2 | sender == 1653 | sender == 1830)

# Target names and V-Dem code
IST$target_vdem <- countrycode(IST$target, "cown", "vdem")
# Some values were not matched unambiguously: 54, 57, 58, 80, 345, 990
# Assign Serbia
IST$target_vdem <- ifelse(IST$target == 345, IST$target_vdem == 198, IST$target_vdem)

# Create country-year data from episode data
IST_CY <- IST %>%
  group_by(caseid) %>%                     # index = episode ID
  dplyr::mutate(year = list(c(start_year:end_year))) %>%
  unnest(year)

# Cut everything after 2021 (had added "2050" as end for episodes ongoing as of 2021.)
IST_CY <- IST_CY %>% subset(year < 2022)

# Drop ICC
IST_CY_nonUN <- IST_CY %>%  filter(!grepl("ICC", comment))

# CY variable
IST_CY_nonUN$country_year    <- paste(IST_CY_nonUN$target_vdem,   IST_CY_nonUN$year,    sep = "_")

# Merge into main df
final_df5_90_21 <- read.csv("final_df5_90_21_b.csv")
final_df5_90_21$country_year <- paste(final_df5_90_21$country_id,
                                      final_df5_90_21$year,
                                      sep = "_")

# Merge
final_df5_90_21_non_UN <- merge(final_df5_90_21,
                                IST_CY_nonUN,
                                by = "country_year",
                                all.x = TRUE)

# Remove duplicates, make sure sanctions Y/N data is retained
final_df5_90_21_non_UN <- final_df5_90_21_non_UN %>%
  group_by(country_year) %>%
  slice_max(order_by = Dem_Sanc_YN.y, n = 1, with_ties = FALSE) %>%
  ungroup()

# NA Dem_Sanc to zero
final_df5_90_21_non_UN$Dem_Sanc_YN.y <- ifelse(is.na(final_df5_90_21_non_UN$Dem_Sanc_YN.y), 0, final_df5_90_21_non_UN$Dem_Sanc_YN.y)

# Cut down
final_df5_90_21_non_UN <-  dplyr::select(final_df5_90_21_non_UN,
                                         year.x,
                                         country_id,
                                         Dem_Sanc_YN.y,
                                         t_events_count,
                                         intra_war_increase_YN,
                                         v2cademmob,
                                         dist_EU_US,
                                         theta_mean,
                                         v2x_polyarchy)

final_df5_90_21_non_UN <- as.data.frame(final_df5_90_21_non_UN)

write_csv(final_df5_90_21_non_UN, "final_df5_90_21_non_UN.csv")

# Estimates
set.seed(123)

PM.results <- PanelMatch(lag = 4,                              
                         time.id = "year.x",
                         unit.id = "country_id",
                         treatment = "Dem_Sanc_YN.y",
                         refinement.method = "CBPS.weight",
                         data = final_df5_90_21_non_UN,
                         covs.formula = ~ 
                           I(lag(t_events_count, 0:4))         +
                           I(lag(intra_war_increase_YN, 0:4))  +
                           I(lag(v2cademmob, 1:4))          +
                           I(lag(dist_EU_US, 1:4))        ,  
                         qoi = "att",
                         outcome.var = "v2x_polyarchy",
                         lead = 0:3,                 
                         forbid.treatment.reversal = FALSE,
                         use.diagonal.variance.matrix = TRUE,
                         placebo.test = TRUE
)

# Check balance
balance <- get_covariate_balance(PM.results$att,
                                 data = final_df5_90_21_non_UN,
                                 covariates = c(
                                   "t_events_count",  # Only at 0 and 1
                                   "intra_war_increase_YN",
                                   "v2cademmob",
                                   "dist_EU_US"
                                 ), 
                                 ylab = "SD",
                                 plot = F)

# Won't print the balance table for the the appendix -- the balance looks good

# Results
PE.results <- PanelEstimate(sets = PM.results,
                            number.iterations = 1000,
                            data = final_df5_90_21_non_UN)

#PE.results[["estimates"]]
#plot(PE.results)
#summary(PE.results)

capture <- capture.output(results_table <- summary(PE.results))

results_table <- results_table[["summary"]]
results_table <- as.data.frame(results_table)
results_table <- tibble::rownames_to_column(results_table,     "Time")

results_table$lower <- results_table$`2.5%`
results_table$upper <- results_table$`97.5%`

# Manually calculate 95% CIs just to be safe and to double-check
results_table$man_lower <- results_table$estimate - (1.96*results_table$std.error)
results_table$man_upper <- results_table$estimate + (1.96*results_table$std.error)

# Levels of t, so ggplot doesn't switch to alphabetical
results_table$Time <- as.character(results_table$Time)
results_table$Time <- factor(results_table$Time, levels=unique(results_table$Time))

# Add a zero for t-1, as Cunningham has and previous versions of PanelMatch had -- for ease of interpretation
results_table <- results_table %>% add_row(Time = "t-1",
                                           estimate = 0,
                                           .before = 1)

### Placebo tests to estimate the pre tratment period

set.seed(123)

placebo_tab <- placebo_test(PM.results, data = final_df5_90_21_non_UN, number.iterations = 1000, plot = F, confidence.level = 0.95)

dm_estimates <- placebo_tab[["bootstrapped.estimates"]]
dm_estimates <- as.data.frame(dm_estimates)

dm_estimates_mean_t4 <- mean(dm_estimates$`t-4`)
dm_estimates_mean_t3 <- mean(dm_estimates$`t-3`)
dm_estimates_mean_t2 <- mean(dm_estimates$`t-2`)

# https://github.com/insongkim/PanelMatch/issues/132#issuecomment-1795026977
# install_github("insongkim/PanelMatch", dependencies=TRUE, ref = "se_comparison")

pt <- placebo_test(PM.results, data = final_df5_90_21_non_UN, se.method = "unconditional", plot = FALSE)

ses_past <- pt[["standard.errors"]]

# Create table
dm_past_estimates <- data.frame(
  Time = c("t-4", "t-3", "t-2"),
  estimate = c(dm_estimates_mean_t4, dm_estimates_mean_t3, dm_estimates_mean_t2),
  std.error = ses_past,
  `2.5%` = NA,
  `97.5%` = NA,
  lower = NA,
  upper = NA,
  man_lower = NA,
  man_upper = NA
)

# Manually calculate 95% CIs just to be safe and to double-check, as above --
dm_past_estimates$man_lower <- dm_past_estimates$estimate - (1.96*dm_past_estimates$std.error)
dm_past_estimates$man_upper <- dm_past_estimates$estimate + (1.96*dm_past_estimates$std.error)

# These indeed seem to be identical to those in the figure

dm_past_estimates$lower <- dm_past_estimates$man_lower
dm_past_estimates$upper <- dm_past_estimates$man_upper

# Merge
# Add these to the t+1, t+2,... estimates table created above

estimates_full <- rbind(dm_past_estimates, setNames(results_table, names(dm_past_estimates)))

estimates_full$Time <- factor(estimates_full$Time, levels = c("t-4", "t-3", "t-2", "t-1", "t+0", "t+1", "t+2", "t+3"))

# Plot the whole thing
results_plot_dem_nonUN <- ggplot(estimates_full,
                           aes(Time, estimate, group = 1))+
  geom_hline(yintercept=0) +
  geom_vline(
    aes(xintercept =  stage(Time, after_scale = 4.5)),
    linetype = "dashed") +
  geom_line()+
  geom_errorbar(width=.1, aes(ymin = lower, ymax = upper)) +
  geom_point(shape=21, size=3, fill = "white") +
  scale_y_continuous(limits = c(-0.135, 0.12),
                     breaks = seq(0.12, -0.125, by = -0.02),
                     labels = scales::number_format(accuracy = 0.01))+
  annotate("text", x = 4.35, y= 0.05, label = "Sanctions imposition", hjust = 1)+
  labs(y = "Estimated Difference in Democracy\n(V-Dem Polyarchy, 0-1)",
       x = element_blank()
  ) +
  theme_clean()

results_plot_dem_nonUN

ggsave("99-Fig-A04.png", results_plot_dem_nonUN, width = 7, height = 2.75, dpi = 300)




# Figure A5 -- human rights outcome
# @knitr Fig-UN-sanc-2

final_df5_90_21_non_UN <- read_csv("final_df5_90_21_non_UN.csv")

final_df5_90_21_non_UN <- as.data.frame(final_df5_90_21_non_UN)

# Estimates
set.seed(123)

PM.results <- PanelMatch(lag = 4,                   
                         time.id = "year.x",
                         unit.id = "country_id",
                         treatment = "Dem_Sanc_YN.y",
                         refinement.method = "CBPS.weight",
                         data = final_df5_90_21_non_UN,
                         covs.formula = ~ 
                           I(lag(t_events_count, 0:4))         +
                           I(lag(intra_war_increase_YN, 0:4))  +
                           I(lag(v2cademmob, 1:4))          +
                           I(lag(dist_EU_US, 1:4))        ,  
                         qoi = "att",
                         outcome.var = "theta_mean",
                         lead = 0:3,             
                         forbid.treatment.reversal = FALSE,
                         use.diagonal.variance.matrix = TRUE,
                         placebo.test = TRUE
)

# Check balance
balance <- get_covariate_balance(PM.results$att,
                                 data = final_df5_90_21_non_UN,
                                 covariates = c(
                                   "t_events_count",  # Only at 0 and 1
                                   "intra_war_increase_YN",
                                   "v2cademmob",
                                   "dist_EU_US"
                                 ), 
                                 ylab = "SD",
                                 plot = F)

# Won't print the balance table for the the appendix -- the balance looks good

# Results
PE.results <- PanelEstimate(sets = PM.results,
                            number.iterations = 1000,
                            data = final_df5_90_21_non_UN)

#PE.results[["estimates"]]
#plot(PE.results)
#summary(PE.results)

capture <- capture.output(results_table <- summary(PE.results))

results_table <- results_table[["summary"]]
results_table <- as.data.frame(results_table)
results_table <- tibble::rownames_to_column(results_table,     "Time")

results_table$lower <- results_table$`2.5%`
results_table$upper <- results_table$`97.5%`

# Manually calculate 95% CIs just to be safe and to double-check
results_table$man_lower <- results_table$estimate - (1.96*results_table$std.error)
results_table$man_upper <- results_table$estimate + (1.96*results_table$std.error)

# Levels of t, so ggplot doesn't switch to alphabetical
results_table$Time <- as.character(results_table$Time)
results_table$Time <- factor(results_table$Time, levels=unique(results_table$Time))

# Add a zero for t-1, as Cunningham has and previous versions of PanelMatch had -- for ease of interpretation
results_table <- results_table %>% add_row(Time = "t-1",
                                           estimate = 0,
                                           .before = 1)

### Placebo tests to estimate the pre tratment period

set.seed(123)

placebo_tab <- placebo_test(PM.results, data = final_df5_90_21_non_UN, number.iterations = 1000, plot = F, confidence.level = 0.95)

hr_estimates <- placebo_tab[["bootstrapped.estimates"]]
hr_estimates <- as.data.frame(hr_estimates)

hr_estimates_mean_t4 <- mean(hr_estimates$`t-4`)
hr_estimates_mean_t3 <- mean(hr_estimates$`t-3`)
hr_estimates_mean_t2 <- mean(hr_estimates$`t-2`)

# https://github.com/insongkim/PanelMatch/issues/132#issuecomment-1795026977
# install_github("insongkim/PanelMatch", dependencies=TRUE, ref = "se_comparison")

pt <- placebo_test(PM.results, data = final_df5_90_21_non_UN, se.method = "unconditional", plot = FALSE)

ses_past <- pt[["standard.errors"]]

# Create table
dm_past_estimates <- data.frame(
  Time = c("t-4", "t-3", "t-2"),
  estimate = c(hr_estimates_mean_t4, hr_estimates_mean_t3, hr_estimates_mean_t2),
  std.error = ses_past,
  `2.5%` = NA,
  `97.5%` = NA,
  lower = NA,
  upper = NA,
  man_lower = NA,
  man_upper = NA
)

# Manually calculate 95% CIs just to be safe and to double-check, as above --
hr_past_estimates$man_lower <- hr_past_estimates$estimate - (1.96*hr_past_estimates$std.error)
hr_past_estimates$man_upper <- hr_past_estimates$estimate + (1.96*hr_past_estimates$std.error)

# These indeed seem to be identical to those in the figure

hr_past_estimates$lower <- hr_past_estimates$man_lower
hr_past_estimates$upper <- hr_past_estimates$man_upper

# Merge
# Add these to the t+1, t+2,... estimates table created above

estimates_full <- rbind(hr_past_estimates, setNames(results_table, names(hr_past_estimates)))

estimates_full$Time <- factor(estimates_full$Time, levels = c("t-4", "t-3", "t-2", "t-1", "t+0", "t+1", "t+2", "t+3"))

# Plot the whole thing
results_plot_hr_nonUN <- ggplot(estimates_full,
                                aes(Time, estimate, group = 1))+
  geom_hline(yintercept=0) +
  geom_vline(
    aes(xintercept =  stage(Time, after_scale = 4.5)),
    linetype = "dashed") +
  geom_line()+
  geom_errorbar(width=.1, aes(ymin = lower, ymax = upper)) +
  geom_point(shape=21, size=3, fill = "white") +
  scale_y_continuous(limits = c(-0.5, 0.5),
                     breaks = seq(-0.5, 0.5, by = 0.1),
                     labels = scales::number_format(accuracy = 0.1))+
  annotate("text", x = 4.35, y= 0.25, label = "Sanctions imposition", hjust = 1)+
  labs(y = "Estimated Difference in Human Rights\n(HRS, -3.5-5.5)",
       x = element_blank()
  ) +
  theme_clean()

results_plot_hr_nonUN

ggsave("99-Fig-A05.png", results_plot_hr_nonUN, width = 7, height = 2.75, dpi = 300)

### Stop logging
#   sink()
